{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T04:27:10Z","timestamp":1765254430801,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":55,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T00:00:00Z","timestamp":1730073600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,28]]},"DOI":"10.1145\/3664647.3681548","type":"proceedings-article","created":{"date-parts":[[2024,10,26]],"date-time":"2024-10-26T06:59:27Z","timestamp":1729925967000},"page":"9933-9942","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Document Registration: Towards Automated Labeling of Pixel-Level Alignment Between Warped-Flat Documents"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-8783-0326","authenticated-orcid":false,"given":"Weiguang","family":"Zhang","sequence":"first","affiliation":[{"name":"Xi'an Jiaotong-Liverpool University, Suzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0918-4606","authenticated-orcid":false,"given":"Qiufeng","family":"Wang","sequence":"additional","affiliation":[{"name":"Xi'an Jiaotong-Liverpool University, Suzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3034-9639","authenticated-orcid":false,"given":"Kaizhu","family":"Huang","sequence":"additional","affiliation":[{"name":"Duke Kunshan University, Suzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6267-0366","authenticated-orcid":false,"given":"Xiaowei","family":"Huang","sequence":"additional","affiliation":[{"name":"University of Liverpool, Liverpool, United Kingdom"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1984-3027","authenticated-orcid":false,"given":"Fengjun","family":"Guo","sequence":"additional","affiliation":[{"name":"IntSig Information Co., Ltd., Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1163-401X","authenticated-orcid":false,"given":"Xiaomeng","family":"Gu","sequence":"additional","affiliation":[{"name":"IntSig Information Co., Ltd., Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,10,28]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMI.2019.2897538"},{"key":"e_1_3_2_1_2_1","volume-title":"Beit: Bert pre-training of image transformers. arXiv preprint arXiv:2106.08254","author":"Bao Hangbo","year":"2021","unstructured":"Hangbo Bao, Li Dong, Songhao Piao, and Furu Wei. 2021. Beit: Bert pre-training of image transformers. arXiv preprint arXiv:2106.08254 (2021)."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1007\/11744023_32"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.bspc.2023.104594"},{"key":"e_1_3_2_1_5_1","volume-title":"Medical Image Analysis","volume":"82","author":"Chen Junyu","year":"2022","unstructured":"Junyu Chen, Eric C. Frey, Yufan He, William P. Segars, Ye Li, and Yong Du. 2022. TransMorph: Transformer for unsupervised medical image registration. Medical Image Analysis, Vol. 82 (Nov. 2022), 102615."},{"key":"e_1_3_2_1_6_1","volume-title":"A Simple Framework for Contrastive Learning of Visual Representations. arXiv preprint arXiv:2002.05709","author":"Chen Ting","year":"2020","unstructured":"Ting Chen, Simon Kornblith, Mohammad Norouzi, and Geoffrey Hinton. 2020. A Simple Framework for Contrastive Learning of Visual Representations. arXiv preprint arXiv:2002.05709 (2020)."},{"key":"e_1_3_2_1_7_1","unstructured":"Xiaokang Chen Mingyu Ding Xiaodi Wang Ying Xin Shentong Mo Yunhao Wang Shumin Han Ping Luo Gang Zeng and Jingdong Wang. 2023. Context Autoencoder for Self-Supervised Representation Learning. http:\/\/arxiv.org\/abs\/2202.03026 arXiv:2202.03026 [cs]."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00022"},{"key":"e_1_3_2_1_9_1","volume-title":"International Conference on Learning Representations(ICLR).","author":"Dosovitskiy Alexey","year":"2021","unstructured":"Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, et al. 2021. An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale. In International Conference on Learning Representations(ICLR)."},{"key":"e_1_3_2_1_10_1","volume-title":"DKM: Dense Kernelized Feature Matching for Geometry Estimation. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition(CVPR).","author":"Edstedt Johan","year":"2023","unstructured":"Johan Edstedt, Ioannis Athanasiadis, M\u00e5rten Wadenb\u00e4ck, and Michael Felsberg. 2023. DKM: Dense Kernelized Feature Matching for Geometry Estimation. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition(CVPR)."},{"key":"e_1_3_2_1_11_1","volume-title":"RoMa: Robust Dense Feature Matching. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition(CVPR).","author":"Edstedt Johan","year":"2024","unstructured":"Johan Edstedt, Qiyu Sun, Georg B\u00f6kman, M\u00e5rten Wadenb\u00e4ck, and Michael Felsberg. 2024. RoMa: Robust Dense Feature Matching. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition(CVPR)."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475388"},{"key":"e_1_3_2_1_13_1","unstructured":"Peng Gao Teli Ma Hongsheng Li Ziyi Lin Jifeng Dai and Yu Qiao. 2022. ConvMAE: Masked Convolution Meets Masked Autoencoders. http:\/\/arxiv.org\/abs\/2205.03892 arXiv:2205.03892 [cs]."},{"key":"e_1_3_2_1_14_1","volume-title":"International Conference on Learning Representations(ICLR).","author":"Geirhos Robert","year":"2018","unstructured":"Robert Geirhos, Patricia Rubisch, Claudio Michaelis, Matthias Bethge, Felix A Wichmann, and Wieland Brendel. 2018. ImageNet-trained CNNs are biased towards texture; increasing shape bias improves accuracy and robustness. In International Conference on Learning Representations(ICLR)."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01553"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10032-023-00434-x"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"crossref","unstructured":"Jisoo Jeong Hong Cai Risheek Garrepalli and Fatih Porikli. 2023. DistractFlow: Improving Optical Flow Estimation via Realistic Distractions and Pseudo-Labeling. http:\/\/arxiv.org\/abs\/2303.14078 arXiv:2303.14078 [cs].","DOI":"10.1109\/CVPR52729.2023.01316"},{"key":"e_1_3_2_1_19_1","unstructured":"Puhua Jiang Mingze Sun and Ruqi Huang. 2023. Non-Rigid Shape Registration via Deep Functional Maps Prior. http:\/\/arxiv.org\/abs\/2311.04494"},{"key":"e_1_3_2_1_20_1","volume-title":"DiffuseMorph: Unsupervised Deformable Image Registration Using Diffusion Model. In European Conference on Computer Vision(ECCV).","volume":"13691","author":"Kim Boah","year":"2022","unstructured":"Boah Kim, Inhwa Han, and Jong Chul Ye. 2022. DiffuseMorph: Unsupervised Deformable Image Registration Using Diffusion Model. In European Conference on Computer Vision(ECCV). Vol. 13691. Springer Nature Switzerland, Cham, 347--364."},{"key":"e_1_3_2_1_21_1","first-page":"707","article-title":"Binary codes capable of correcting deletions, insertions, and reversals","volume":"10","author":"Levenshtein Vladimir I","year":"1966","unstructured":"Vladimir I Levenshtein et al. 1966. Binary codes capable of correcting deletions, insertions, and reversals. Soviet Physics Doklady, Vol. 10, 8 (1966), 707--710.","journal-title":"Soviet Physics Doklady"},{"key":"e_1_3_2_1_22_1","volume-title":"Efficient self-supervised vision transformers for representation learning. arXiv preprint arXiv:2106.09785","author":"Li Chunyuan","year":"2021","unstructured":"Chunyuan Li, Jianwei Yang, Pengchuan Zhang, Mei Gao, Bin Xiao, Xiyang Dai, Lu Yuan, and Jianfeng Gao. 2021. Efficient self-supervised vision transformers for representation learning. arXiv preprint arXiv:2106.09785 (2021)."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-63833-7_13"},{"key":"e_1_3_2_1_24_1","article-title":"Layout-Aware Single-Image Document Flattening","volume":"43","author":"Li Pu","year":"2023","unstructured":"Pu Li, Weize Quan, Jianwei Guo, and Dong-Ming Yan. 2023. Layout-Aware Single-Image Document Flattening. ACM Transactions on Graphics(TOG), Vol. 43, 1 (2023).","journal-title":"ACM Transactions on Graphics(TOG)"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01200"},{"key":"e_1_3_2_1_26_1","first-page":"1","article-title":"Document rectification and illumination correction using a patch-based CNN","volume":"38","author":"Li Xiaoyu","year":"2019","unstructured":"Xiaoyu Li, Bo Zhang, Jing Liao, and Pedro V Sander. 2019. Document rectification and illumination correction using a patch-based CNN. ACM Transactions on Graphics(TOG), Vol. 38, 6 (2019), 1--11.","journal-title":"ACM Transactions on Graphics(TOG)"},{"key":"e_1_3_2_1_27_1","unstructured":"Zinuo Li Xuhang Chen Chi-Man Pun and Xiaodong Cun. 2023. High-Resolution Document Shadow Removal via A Large-Scale Real-World Dataset and A Frequency-Aware Shadow Erasing Net. http:\/\/arxiv.org\/abs\/2308.14221 arXiv:2308.14221 [cs]."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICME55011.2023.00278"},{"key":"e_1_3_2_1_29_1","volume-title":"Self-emd: Self-supervised object detection without imagenet. arXiv preprint arXiv:2011.13677","author":"Liu Songtao","year":"2020","unstructured":"Songtao Liu, Zeming Li, and Jian Sun. 2020. Self-emd: Self-supervised object detection without imagenet. arXiv preprint arXiv:2011.13677 (2020)."},{"key":"e_1_3_2_1_30_1","volume-title":"Lianwen Jin, and Xiang Bai.","author":"Liu Yuliang","year":"2024","unstructured":"Yuliang Liu, Zhang Li, Biao Yang, Chunyuan Li, Xucheng Yin, Cheng lin Liu, Lianwen Jin, and Xiang Bai. 2024. On the Hidden Mystery of OCR in Large Multimodal Models. arxiv: 2305.07895 [cs.CV]"},{"key":"e_1_3_2_1_31_1","volume-title":"Classification and regression trees","author":"Loh Wei-Yin","year":"2011","unstructured":"Wei-Yin Loh. 2011. Classification and regression trees. Wiley interdisciplinary reviews: data mining and knowledge discovery, Vol. 1, 1 (2011), 14--23."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.1999.790410"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-020-01359-2"},{"volume-title":"Learning From Documents in the Wild to Improve Document Unwarping","author":"Ma Ke","key":"e_1_3_2_1_34_1","unstructured":"Ke Ma, Sagnik Das, Zhixin Shu, and Dimitris Samaras. 2022. Learning From Documents in the Wild to Improve Document Unwarping. In ACM Special Interest Group on Computer Graphics(SIGGRAPH). 1--9."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00494"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"crossref","unstructured":"Iaroslav Melekhov Aleksei Tiulpin Torsten Sattler Marc Pollefeys Esa Rahtu and Juho Kannala. 2018. DGC-Net: Dense Geometric Correspondence Network. http:\/\/arxiv.org\/abs\/1810.08393 arXiv:1810.08393 [cs].","DOI":"10.1109\/WACV.2019.00115"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01705"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00326"},{"key":"e_1_3_2_1_39_1","unstructured":"Yongxin Shi Dezhi Peng Wenhui Liao Zening Lin Xinhong Chen Chongyu Liu Yuyi Zhang and Lianwen Jin. 2023. Exploring OCR Capabilities of GPT-4V(ision) : A Quantitative and In-depth Evaluation. arxiv: 2310.16809 [cs.CV]"},{"key":"e_1_3_2_1_40_1","volume-title":"Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556","author":"Simonyan Karen","year":"2014","unstructured":"Karen Simonyan and Andrew Zisserman. 2014. Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556 (2014)."},{"key":"e_1_3_2_1_41_1","volume-title":"Super-convergence: Very fast training of neural networks using large learning rates. arXiv. arXiv preprint arXiv:1708.07120","author":"Smith Leslie N","year":"2017","unstructured":"Leslie N Smith and Nicholay Topin. 2017. Super-convergence: Very fast training of neural networks using large learning rates. arXiv. arXiv preprint arXiv:1708.07120, Vol. 6 (2017)."},{"key":"e_1_3_2_1_42_1","volume-title":"Efficientnet: Rethinking model scaling for convolutional neural networks. PMLR, 6105--6114.","author":"Tan Mingxing","year":"2019","unstructured":"Mingxing Tan and Quoc Le. 2019. Efficientnet: Rethinking model scaling for convolutional neural networks. PMLR, 6105--6114."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00629"},{"key":"e_1_3_2_1_44_1","volume-title":"UVDoc: Neural Grid-based Document Unwarping. In ACM SIGGRAPH Conference and Exhibition on Computer Graphics and Interactive Techniques in Asia(SIGGRAPH ASIA).","author":"Verhoeven Floor","year":"2023","unstructured":"Floor Verhoeven, Tanguy Magne, and Olga Sorkine-Hornung. 2023. UVDoc: Neural Grid-based Document Unwarping. In ACM SIGGRAPH Conference and Exhibition on Computer Graphics and Interactive Techniques in Asia(SIGGRAPH ASIA)."},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACSSC.2003.1292216"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01647"},{"key":"e_1_3_2_1_47_1","unstructured":"Guangyang Wu Xiaohong Liu Kunming Luo Xi Liu Qingqing Zheng Shuaicheng Liu Xinyang Jiang Guangtao Zhai and Wenyi Wang. 2023. AccFlow: Backward Accumulation for Long-Range Optical Flow. http:\/\/arxiv.org\/abs\/2308.13133 arXiv:2308.13133 [cs]."},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-57058-3_10"},{"key":"e_1_3_2_1_49_1","volume-title":"Document Dewarping with Control Points. In International Conference on Document Analysis and Recognition(ICDAR). 466--480","author":"Xie Guo-Wang","year":"2021","unstructured":"Guo-Wang Xie, Fei Yin, Xu-Yao Zhang, and Cheng-Lin Liu. 2021. Document Dewarping with Control Points. In International Conference on Document Analysis and Recognition(ICDAR). 466--480."},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00453"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2017.2675980"},{"key":"e_1_3_2_1_52_1","unstructured":"Jiaxin Zhang Bangdong Chen Hiuyi Cheng Fengjun Guo Kai Ding and Lianwen Jin. 2023. DocAligner: Annotating Real-world Photographic Document Images by Simply Taking Pictures. arXiv:2306.05749 [cs]."},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3548214"},{"key":"e_1_3_2_1_54_1","volume-title":"Polar-Doc: One-Stage Document Dewarping with Multi-Scope Constraints under Polar Representation. arXiv preprint arXiv:2312.07925","author":"Zhang Weiguang","year":"2023","unstructured":"Weiguang Zhang, Qiufeng Wang, and Kaizhu Huang. 2023. Polar-Doc: One-Stage Document Dewarping with Multi-Scope Constraints under Polar Representation. arXiv preprint arXiv:2312.07925 (2023)."},{"key":"e_1_3_2_1_55_1","unstructured":"Shengjie Zhu and Xiaoming Liu. 2023. PMatch: Paired Masked Image Modeling for Dense Geometric Matching. arXiv:2303.17342 [cs]."}],"event":{"name":"MM '24: The 32nd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Melbourne VIC Australia","acronym":"MM '24"},"container-title":["Proceedings of the 32nd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681548","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3664647.3681548","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:57:48Z","timestamp":1750294668000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681548"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,28]]},"references-count":55,"alternative-id":["10.1145\/3664647.3681548","10.1145\/3664647"],"URL":"https:\/\/doi.org\/10.1145\/3664647.3681548","relation":{},"subject":[],"published":{"date-parts":[[2024,10,28]]},"assertion":[{"value":"2024-10-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}