{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,30]],"date-time":"2026-04-30T16:49:43Z","timestamp":1777567783344,"version":"3.51.4"},"reference-count":78,"publisher":"Springer Science and Business Media LLC","issue":"8","license":[{"start":{"date-parts":[[2025,5,26]],"date-time":"2025-05-26T00:00:00Z","timestamp":1748217600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,5,26]],"date-time":"2025-05-26T00:00:00Z","timestamp":1748217600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Comput Vis"],"published-print":{"date-parts":[[2025,8]]},"DOI":"10.1007\/s11263-025-02431-5","type":"journal-article","created":{"date-parts":[[2025,5,26]],"date-time":"2025-05-26T02:48:19Z","timestamp":1748227699000},"page":"5343-5362","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["DocScanner: Robust Document Image Rectification with Progressive Learning"],"prefix":"10.1007","volume":"133","author":[{"given":"Hao","family":"Feng","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wengang","family":"Zhou","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jiajun","family":"Deng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Qi","family":"Tian","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2188-3028","authenticated-orcid":false,"given":"Houqiang","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,5,26]]},"reference":[{"issue":"2","key":"2431_CR1","doi-asserted-by":"publisher","first-page":"157","DOI":"10.1117\/1.1455013","volume":"11","author":"I Amidror","year":"2002","unstructured":"Amidror, I. (2002). Scattered data interpolation methods for electronic imaging systems: A survey. Journal of Electronic Imaging, 11(2), 157\u2013176.","journal-title":"Journal of Electronic Imaging"},{"issue":"11","key":"2431_CR2","doi-asserted-by":"publisher","first-page":"1222","DOI":"10.1109\/34.969114","volume":"23","author":"Y Boykov","year":"2001","unstructured":"Boykov, Y., Veksler, O., & Zabih, R. (2001). Fast approximate energy minimization via graph cuts. IEEE Transactions on Pattern Analysis and Machine Intelligence, 23(11), 1222\u20131239.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"2431_CR3","first-page":"367","volume":"2","author":"MS Brown","year":"2001","unstructured":"Brown, M. S., & Seales, W. B. (2001). Document restoration using 3D shape: A general deskewing algorithm for arbitrarily warped documents. Proceedings of the IEEE International Conference on Computer Vision, 2, 367\u2013374.","journal-title":"Proceedings of the IEEE International Conference on Computer Vision"},{"issue":"10","key":"2431_CR4","doi-asserted-by":"publisher","first-page":"1295","DOI":"10.1109\/TPAMI.2004.87","volume":"26","author":"MS Brown","year":"2004","unstructured":"Brown, M. S., & Seales, W. B. (2004). Image restoration of arbitrarily warped documents. IEEE Transactions on Pattern Analysis and Machine Intelligence, 26(10), 1295\u20131306.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"issue":"11","key":"2431_CR5","doi-asserted-by":"publisher","first-page":"1904","DOI":"10.1109\/TPAMI.2007.1118","volume":"29","author":"MS Brown","year":"2007","unstructured":"Brown, M. S., Sun, M., Yang, R., Yun, L., & Seales, W. B. (2007). Restoring 2D content from distorted documents. IEEE Transactions on Pattern Analysis and Machine Intelligence, 29(11), 1904\u20131916.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"issue":"6","key":"2431_CR6","doi-asserted-by":"publisher","first-page":"1544","DOI":"10.1109\/TIP.2006.871082","volume":"15","author":"MS Brown","year":"2006","unstructured":"Brown, M. S., & Tsoi, Y. C. (2006). Geometric and shading correction for images of printed materials using boundary. IEEE Transactions on Image Processing, 15(6), 1544\u20131554.","journal-title":"IEEE Transactions on Image Processing"},{"key":"2431_CR7","doi-asserted-by":"crossref","unstructured":"Cho, K., Van Merri\u00ebnboer, B., Bahdanau, D., & Bengio, Y. (2014). On the properties of neural machine translation: Encoder\u2013decoder approaches. arXiv preprint arXiv:1409.1259.","DOI":"10.3115\/v1\/W14-4012"},{"key":"2431_CR8","doi-asserted-by":"crossref","unstructured":"Cimpoi, M., Maji, S., Kokkinos, I., Mohamed, S., & Vedaldi, A. (2014). Describing textures in the wild. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 3606\u20133613).","DOI":"10.1109\/CVPR.2014.461"},{"issue":"5","key":"2431_CR9","doi-asserted-by":"publisher","first-page":"301","DOI":"10.1007\/s00138-006-0062-y","volume":"18","author":"F Courteille","year":"2007","unstructured":"Courteille, F., Crouzil, A., Durou, J. D., & Gurdjos, P. (2007). Shape from shading for the digitization of curved documents. Machine Vision and Applications, 18(5), 301\u2013316.","journal-title":"Machine Vision and Applications"},{"key":"2431_CR10","doi-asserted-by":"crossref","unstructured":"Das, S., Ma, K., Shu, Z., Samaras, D., & Shilkrot, R. (2019). DewarpNet: Single-image document unwarping with stacked 3D and 2D regression networks. In Proceedings of the international conference on computer vision (pp. 131\u2013140).","DOI":"10.1109\/ICCV.2019.00022"},{"key":"2431_CR11","doi-asserted-by":"crossref","unstructured":"Das, S., Sial, H. M., Baldrich, R., Vanrell, M., & Samaras, D. (2020). Intrinsic decomposition of document images in-the-wild. In Proceedings of the British machine vision conference.","DOI":"10.5244\/C.34.188"},{"key":"2431_CR12","doi-asserted-by":"crossref","unstructured":"Das, S., Singh, K. Y., Wu, J., Bas, E., Mahadevan, V., Bhotika, R., & Samaras, D. (2021). End-to-end piece-wise unwarping of document images. In Proceedings of the IEEE international conference on computer vision (pp. 4268\u20134277).","DOI":"10.1109\/ICCV48922.2021.00423"},{"issue":"1","key":"2431_CR13","doi-asserted-by":"publisher","first-page":"19","DOI":"10.1007\/s10479-005-5724-z","volume":"134","author":"PT De Boer","year":"2005","unstructured":"De Boer, P. T., Kroese, D. P., Mannor, S., & Rubinstein, R. Y. (2005). A tutorial on the cross-entropy method. Annals of Operations Research, 134(1), 19\u201367.","journal-title":"Annals of Operations Research"},{"key":"2431_CR14","doi-asserted-by":"crossref","unstructured":"Feng, H., Wang, Y., Zhou, W., Deng, J., & Li, H. (2021). DocTr: Document image transformer for geometric unwarping and illumination correction. In Proceedings of the ACM international conference on multimedia (pp. 273\u2013281).","DOI":"10.1145\/3474085.3475388"},{"key":"2431_CR15","doi-asserted-by":"crossref","unstructured":"Feng, H., Zhou, W., Deng, J., Wang, Y., & Li, H. (2022). Geometric representation learning for document image rectification. In Proceedings of the European conference on computer vision.","DOI":"10.1007\/978-3-031-19836-6_27"},{"key":"2431_CR16","doi-asserted-by":"crossref","unstructured":"Garai, A., Biswas, S., & Mandal, S. (2021). A theoretical justification of warping generation for dewarping using CNN. Pattern Recognition, 109, 107621.","DOI":"10.1016\/j.patcog.2020.107621"},{"issue":"1","key":"2431_CR17","doi-asserted-by":"publisher","first-page":"74","DOI":"10.1049\/iet-ipr.2019.0831","volume":"14","author":"A Garai","year":"2020","unstructured":"Garai, A., Biswas, S., Mandal, S., & Chaudhuri, B. B. (2020). Automatic rectification of warped Bangla document images. IET Image Processing, 14(1), 74\u201383.","journal-title":"IET Image Processing"},{"issue":"1","key":"2431_CR18","doi-asserted-by":"publisher","first-page":"1537","DOI":"10.1007\/s11042-022-13234-y","volume":"82","author":"A Garai","year":"2023","unstructured":"Garai, A., Dutta, A., & Biswas, S. (2023). Automatic dewarping of camera-captured comic document images. Multimedia Tools and Applications, 82(1), 1537\u20131552.","journal-title":"Multimedia Tools and Applications"},{"key":"2431_CR19","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., & Sun, J. (2016). Deep residual learning for image recognition. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 770\u2013778).","DOI":"10.1109\/CVPR.2016.90"},{"key":"2431_CR20","doi-asserted-by":"crossref","unstructured":"He, Y., Pan, P., Xie, S., Sun, J., & Naoi, S. (2013). A book dewarping system by boundary-based 3D surface reconstruction. In Proceedings of the international conference on document analysis and recognition (pp. 403\u2013407).","DOI":"10.1109\/ICDAR.2013.88"},{"issue":"8","key":"2431_CR21","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter, S., & Schmidhuber, J. (1997). Long short-term memory. Neural Computation, 9(8), 1735\u20131780.","journal-title":"Neural Computation"},{"key":"2431_CR22","unstructured":"Jaderberg, M., Simonyan, K., Zisserman, A., et al. (2015). Spatial transformer networks. In Proceedings of the neural information processing systems (pp. 2017\u20132025)."},{"key":"2431_CR23","doi-asserted-by":"crossref","unstructured":"Jiang, X., Long, R., Xue, N., Yang, Z., Yao, C., & Xia, G. S. (2022). Revisiting document image dewarping by grid regularization. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 4543\u20134552).","DOI":"10.1109\/CVPR52688.2022.00450"},{"key":"2431_CR24","doi-asserted-by":"crossref","unstructured":"Kil, T., Seo, W., Koo, H. I., & Cho, N. I. (2017). Robust document image dewarping method using text-lines and line segments. In Proceedings of the international conference on document analysis and recognition (Vol. 1, pp. 865\u2013870).","DOI":"10.1109\/ICDAR.2017.146"},{"issue":"11","key":"2431_CR25","doi-asserted-by":"publisher","first-page":"3600","DOI":"10.1016\/j.patcog.2015.04.026","volume":"48","author":"BS Kim","year":"2015","unstructured":"Kim, B. S., Koo, H. I., & Cho, N. I. (2015). Document dewarping via text-line based optimization. Pattern Recognition, 48(11), 3600\u20133614.","journal-title":"Pattern Recognition"},{"key":"2431_CR26","doi-asserted-by":"crossref","unstructured":"Kim, G., Hong, T., Yim, M., Nam, J., Park, J., Yim, J., Hwang, W., Yun, S., Han, D., & Park, S. (2022). OCR-free document understanding transformer. In Proceedings of the European conference on computer vision (pp. 498\u2013517).","DOI":"10.1007\/978-3-031-19815-1_29"},{"key":"2431_CR27","unstructured":"Kingma, D. P., & Ba, J. (2014). Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980."},{"key":"2431_CR28","doi-asserted-by":"crossref","unstructured":"Koo, H. I., & Cho, N. I. (2010). State estimation in a document image and its application in text block identification and text line extraction. In Proceedings of the European conference on computer vision (pp. 421\u2013434).","DOI":"10.1007\/978-3-642-15552-9_31"},{"issue":"7","key":"2431_CR29","doi-asserted-by":"publisher","first-page":"1551","DOI":"10.1109\/TIP.2009.2019301","volume":"18","author":"HI Koo","year":"2009","unstructured":"Koo, H. I., Kim, J., & Cho, N. I. (2009). Composition of a dewarped and enhanced document image from two view images. IEEE Transactions on Image Processing, 18(7), 1551\u20131562.","journal-title":"IEEE Transactions on Image Processing"},{"key":"2431_CR30","first-page":"748","volume":"3","author":"O Lavialle","year":"2001","unstructured":"Lavialle, O., Molines, X., Angella, F., & Baylou, P. (2001). Active contours network to straighten distorted text lines. Proceedings of the International Conference on Image Processing, 3, 748\u2013751.","journal-title":"Proceedings of the International Conference on Image Processing"},{"key":"2431_CR31","first-page":"707","volume":"10","author":"VI Levenshtein","year":"1966","unstructured":"Levenshtein, V. I. (1966). Binary codes capable of correcting deletions, insertions, and reversals. Soviet Physics Doklady, 10, 707\u2013710.","journal-title":"Soviet Physics Doklady"},{"issue":"3","key":"2431_CR32","doi-asserted-by":"publisher","first-page":"362","DOI":"10.1145\/566654.566590","volume":"21","author":"B L\u00e9vy","year":"2002","unstructured":"L\u00e9vy, B., Petitjean, S., Ray, N., & Maillot, J. (2002). Least squares conformal maps for automatic texture atlas generation. ACM Transactions on Graphics, 21(3), 362\u2013371.","journal-title":"ACM Transactions on Graphics"},{"issue":"6","key":"2431_CR33","first-page":"1","volume":"38","author":"X Li","year":"2019","unstructured":"Li, X., Zhang, B., Liao, J., & Sander, P. V. (2019). Document rectification and illumination correction using a patch-based CNN. ACM Transactions on Graphics, 38(6), 1\u201311.","journal-title":"ACM Transactions on Graphics"},{"issue":"4","key":"2431_CR34","doi-asserted-by":"publisher","first-page":"591","DOI":"10.1109\/TPAMI.2007.70724","volume":"30","author":"J Liang","year":"2008","unstructured":"Liang, J., DeMenthon, D., & Doermann, D. (2008). Geometric rectification of camera-captured document images. IEEE Transactions on Pattern Analysis and Machine Intelligence, 30(4), 591\u2013605.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"issue":"5","key":"2431_CR35","doi-asserted-by":"publisher","first-page":"978","DOI":"10.1109\/TPAMI.2010.147","volume":"33","author":"C Liu","year":"2011","unstructured":"Liu, C., Yuen, J., & Torralba, A. (2011). SIFT flow: Dense correspondence across scenes and its applications. IEEE Transactions on Pattern Analysis and Machine Intelligence, 33(5), 978\u2013994.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"2431_CR36","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2020.107576","volume":"108","author":"X Liu","year":"2020","unstructured":"Liu, X., Meng, G., Fan, B., Xiang, S., & Pan, C. (2020). Geometric rectification of document images using adversarial gated unwarping network. Pattern Recognition, 108, Article 107576.","journal-title":"Pattern Recognition"},{"key":"2431_CR37","doi-asserted-by":"crossref","unstructured":"Long, J., Shelhamer, E., & Darrell, T. (2015). Fully convolutional networks for semantic segmentation. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 3431\u20133440).","DOI":"10.1109\/CVPR.2015.7298965"},{"key":"2431_CR38","unstructured":"Loshchilov, I., & Hutter, F. (2017). Decoupled weight decay regularization. arXiv preprint arXiv:1711.05101."},{"issue":"2","key":"2431_CR39","doi-asserted-by":"publisher","first-page":"91","DOI":"10.1023\/B:VISI.0000029664.99615.94","volume":"60","author":"DG Lowe","year":"2004","unstructured":"Lowe, D. G. (2004). Distinctive image features from scale-invariant keypoints. International Journal of Computer Vision, 60(2), 91\u2013110.","journal-title":"International Journal of Computer Vision"},{"key":"2431_CR40","doi-asserted-by":"crossref","unstructured":"Ma, K., Das, S., Shu, Z., & Samaras, D. (2022). Learning from documents in the wild to improve document unwarping. In Proceedings of the ACM SIGGRAPH conference (pp. 1\u20139).","DOI":"10.1145\/3528233.3530756"},{"key":"2431_CR41","doi-asserted-by":"crossref","unstructured":"Ma, K., Shu, Z., Bai, X., Wang, J., & Samaras, D. (2018). DocUNet: Document image unwarping via a stacked U-Net. In Proceedings of the IEEE international conference on computer vision (pp. 4700\u20134709).","DOI":"10.1109\/CVPR.2018.00494"},{"key":"2431_CR42","doi-asserted-by":"crossref","unstructured":"Markovitz, A., Lavi, I., Perel, O., Mazor, S., & Litman, R. (2020). Can you read me now? Content aware rectification using angle supervision. In Proceedings of the European conference on computer vision (pp. 208\u2013223).","DOI":"10.1007\/978-3-030-58610-2_13"},{"key":"2431_CR43","doi-asserted-by":"crossref","unstructured":"Mathew, M., Karatzas, D., & Jawahar, C. (2021). DocVQA: A dataset for VQA on document images. In Proceedings of the IEEE winter conference on applications of computer vision (pp. 2200\u20132209).","DOI":"10.1109\/WACV48630.2021.00225"},{"issue":"4","key":"2431_CR44","doi-asserted-by":"publisher","first-page":"707","DOI":"10.1109\/TPAMI.2011.151","volume":"34","author":"G Meng","year":"2011","unstructured":"Meng, G., Pan, C., Xiang, S., Duan, J., & Zheng, N. (2011). Metric rectification of curved document images. IEEE Transactions on Pattern Analysis and Machine Intelligence, 34(4), 707\u2013722.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"2431_CR45","doi-asserted-by":"crossref","unstructured":"Meng, G., Su, Y., Wu, Y., Xiang, S., & Pan, C. (2018). Exploiting vector fields for geometric rectification of distorted document images. In Proceedings of the European conference on computer vision (pp. 172\u2013187).","DOI":"10.1007\/978-3-030-01270-0_11"},{"key":"2431_CR46","doi-asserted-by":"crossref","unstructured":"Meng, G., Wang, Y., Qu, S., Xiang, S., & Pan, C. (2014). Active flattening of curved document images via two structured beams. In Proceedings of the IEEE international conference on computer vision (pp. 3890\u20133897).","DOI":"10.1109\/CVPR.2014.497"},{"issue":"1","key":"2431_CR47","doi-asserted-by":"publisher","first-page":"34","DOI":"10.1007\/s11263-016-0952-z","volume":"122","author":"G Meng","year":"2017","unstructured":"Meng, G., Xiang, S., Pan, C., & Zheng, N. (2017). Active rectification of curved document images using structured beams. International Journal of Computer Vision, 122(1), 34\u201360.","journal-title":"International Journal of Computer Vision"},{"key":"2431_CR48","doi-asserted-by":"crossref","unstructured":"Mischke, L., & Luther, W. (2005). Document image de-warping based on detection of distorted text lines. In Proceedings of the international conference on image analysis and processing (pp. 1068\u20131075).","DOI":"10.1007\/11553595_131"},{"key":"2431_CR49","doi-asserted-by":"crossref","unstructured":"Morris, A. C., Maier, V., & Green, P. (2004). From WER and RIL to MER and WIL: Improved evaluation measures for connected speech recognition. In Proceedings of the international conference on spoken language processing.","DOI":"10.21437\/Interspeech.2004-668"},{"key":"2431_CR50","unstructured":"Paszke, A., Gross, S., Chintala, S., Chanan, G., Yang, E., DeVito, Z., Lin, Z., Desmaison, A., Antiga, L., & Lerer, A. (2017). Automatic differentiation in PyTorch."},{"issue":"11","key":"2431_CR51","doi-asserted-by":"publisher","first-page":"2623","DOI":"10.1007\/s11263-022-01654-0","volume":"130","author":"D Peng","year":"2022","unstructured":"Peng, D., Jin, L., Liu, Y., Luo, C., & Lai, S. (2022). PageNet: Towards end-to-end weakly supervised page-level handwritten Chinese text recognition. International Journal of Computer Vision, 130(11), 2623\u20132645.","journal-title":"International Journal of Computer Vision"},{"key":"2431_CR52","doi-asserted-by":"publisher","first-page":"107404","DOI":"10.1016\/j.patcog.2020.107404","volume":"106","author":"X Qin","year":"2020","unstructured":"Qin, X., Zhang, Z., Huang, C., Dehghan, M., Zaiane, O. R., & Jagersand, M. (2020). U2-Net: Going deeper with nested u-structure for salient object detection. Pattern Recognition, 106, 107404.","journal-title":"Pattern Recognition"},{"key":"2431_CR53","doi-asserted-by":"crossref","unstructured":"Ronneberger, O., Fischer, P., & Brox, T. (2015). U-Net: Convolutional networks for biomedical image segmentation. In Proceedings of the international conference on medical image computing and computer-assisted intervention (pp. 234\u2013241).","DOI":"10.1007\/978-3-319-24574-4_28"},{"key":"2431_CR54","doi-asserted-by":"crossref","unstructured":"Smith, R. (2007). An overview of the tesseract OCR engine. In Proceedings of the international conference on document analysis and recognition (Vol. 2, pp. 629\u2013633).","DOI":"10.1109\/ICDAR.2007.4376991"},{"issue":"2","key":"2431_CR55","doi-asserted-by":"publisher","first-page":"195","DOI":"10.1109\/TPAMI.2006.40","volume":"28","author":"CL Tan","year":"2006","unstructured":"Tan, C. L., Zhang, L., Zhang, Z., & Xia, T. (2006). Restoring warped document images through 3D shape modeling. IEEE Transactions on Pattern Analysis and Machine Intelligence, 28(2), 195\u2013208.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"2431_CR56","doi-asserted-by":"crossref","unstructured":"Teed, Z., & Deng, J. (2020). RAFT: Recurrent all-pairs field transforms for optical flow. In Proceedings of the European conference on computer vision (pp. 402\u2013419).","DOI":"10.1007\/978-3-030-58536-5_24"},{"key":"2431_CR57","doi-asserted-by":"crossref","unstructured":"Tian, Y., & Narasimhan, S. G. (2011). Rectification and 3D reconstruction of curved document images. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 377\u2013384).","DOI":"10.1109\/CVPR.2011.5995540"},{"key":"2431_CR58","doi-asserted-by":"crossref","unstructured":"Tokmakov, P., Alahari, K., & Schmid, C. (2017). Learning video object segmentation with visual memory. In Proceedings of the IEEE international conference on computer vision (pp. 4481\u20134490).","DOI":"10.1109\/ICCV.2017.480"},{"key":"2431_CR59","doi-asserted-by":"crossref","unstructured":"Tsoi, Y. C., & Brown, M. S. (2007). Multi-view document rectification using boundary. Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp 1\u20138.","DOI":"10.1109\/CVPR.2007.383251"},{"key":"2431_CR60","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A. N., Kaiser, L., & Polosukhin, I. (2017). Attention is all you need. In Proceedings of the neural information processing systems (pp. 6000\u20136010)."},{"issue":"2","key":"2431_CR61","doi-asserted-by":"publisher","first-page":"125","DOI":"10.1023\/A:1007906904009","volume":"24","author":"T Wada","year":"1997","unstructured":"Wada, T., Ukida, H., & Matsuyama, T. (1997). Shape from shading with interreflections under a proximal light source: Distortion-free copying of an unfolded book. International Journal of Computer Vision, 24(2), 125\u2013135.","journal-title":"International Journal of Computer Vision"},{"issue":"4","key":"2431_CR62","doi-asserted-by":"publisher","first-page":"600","DOI":"10.1109\/TIP.2003.819861","volume":"13","author":"Z Wang","year":"2004","unstructured":"Wang, Z., Bovik, A. C., Sheikh, H. R., & Simoncelli, E. P. (2004). Image quality assessment: From error visibility to structural similarity. IEEE Transactions on Image Processing, 13(4), 600\u2013612.","journal-title":"IEEE Transactions on Image Processing"},{"key":"2431_CR63","doi-asserted-by":"crossref","unstructured":"Wang, Z., Simoncelli, E. P., & Bovik, A. C. (2003). Multiscale structural similarity for image quality assessment. In Proceedings of the Asilomar conference on signals, systems computers (Vol. 2, pp. 1398\u20131402).","DOI":"10.1109\/ACSSC.2003.1292216"},{"key":"2431_CR64","doi-asserted-by":"crossref","unstructured":"Wu, C., & Agam, G. (2002). Document image de-warping for text\/graphics recognition. In Proceedings of the joint IAPR international workshops on statistical techniques in pattern recognition and structural and syntactic pattern recognition (pp. 348\u2013357).","DOI":"10.1007\/3-540-70659-3_36"},{"key":"2431_CR65","doi-asserted-by":"crossref","unstructured":"Xie, G., Yin, F., Zhang, X., & Liu, C. (2020). Dewarping document image by displacement flow estimation with fully convolutional network. In Proceedings of the international workshop on document analysis systems (pp. 131\u2013144).","DOI":"10.1007\/978-3-030-57058-3_10"},{"key":"2431_CR66","doi-asserted-by":"crossref","unstructured":"Xie, G. W., Yin, F., Zhang, X. Y., & Liu, C. L. (2021). Document dewarping with control points. In Proceedings of the international conference on document analysis and recognition (pp. 466\u2013480).","DOI":"10.1007\/978-3-030-86549-8_30"},{"key":"2431_CR67","doi-asserted-by":"crossref","unstructured":"Xue, C., Tian, Z., Zhan, F., Lu, S., & Bai, S. (2022). Fourier document restoration for robust document dewarping and recognition. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 4573\u20134582).","DOI":"10.1109\/CVPR52688.2022.00453"},{"key":"2431_CR68","doi-asserted-by":"crossref","unstructured":"Yamashita, A., Kawarago, A., Kaneko, T., & Miura, K. T. (2004). Shape reconstruction and image restoration for non-flat surfaces of documents with a stereo vision system. In Proceedings of the international conference on pattern recognition (Vol. 1, pp. 482\u2013485).","DOI":"10.1109\/ICPR.2004.1334171"},{"key":"2431_CR69","doi-asserted-by":"crossref","unstructured":"Yang, S., Lin, C., Liao, K., Zhang, C., & Zhao, Y. (2021). Progressively complementary network for fisheye image rectification using appearance flow. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 6348\u20136357).","DOI":"10.1109\/CVPR46437.2021.00628"},{"issue":"2","key":"2431_CR70","doi-asserted-by":"publisher","first-page":"505","DOI":"10.1109\/TPAMI.2017.2675980","volume":"40","author":"S You","year":"2018","unstructured":"You, S., Matsushita, Y., Sinha, S., Bou, Y., & Ikeuchi, K. (2018). Multiview rectification of folded documents. IEEE Transactions on Pattern Analysis and Machine Intelligence, 40(2), 505\u2013511.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"2431_CR71","doi-asserted-by":"crossref","unstructured":"Yuan, Y., Liu, X., Dikubab, W., Liu, H., Ji, Z., Wu, Z., & Bai, X. (2022). Syntax-aware network for handwritten mathematical expression recognition. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 4553\u20134562).","DOI":"10.1109\/CVPR52688.2022.00451"},{"key":"2431_CR72","doi-asserted-by":"crossref","unstructured":"Zamir, S. W., Arora, A., Khan, S., Hayat, M., Khan, F. S., Yang, M. H., & Shao, L. (2021). Multi-stage progressive image restoration. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 14821\u201314831).","DOI":"10.1109\/CVPR46437.2021.01458"},{"key":"2431_CR73","doi-asserted-by":"crossref","unstructured":"Zandifar, A. (2007). Unwarping scanned image of Japanese\/English documents. In Proceedings of the international conference on image analysis and processing (pp. 129\u2013136).","DOI":"10.1109\/ICIAP.2007.4362769"},{"key":"2431_CR74","doi-asserted-by":"crossref","unstructured":"Zhang, J., Luo, C., Jin, L., Guo, F., & Ding, K. (2022). Marior: Margin removal and iterative content rectification for document dewarping in the wild. In Proceedings of the ACM international conference on multimedia (pp. 2805\u20132815).","DOI":"10.1145\/3503161.3548214"},{"issue":"11","key":"2431_CR75","doi-asserted-by":"publisher","first-page":"2961","DOI":"10.1016\/j.patcog.2009.03.025","volume":"42","author":"L Zhang","year":"2009","unstructured":"Zhang, L., Yip, A. M., Brown, M. S., & Tan, C. L. (2009). A unified framework for document restoration using inpainting and shape-from-shading. Pattern Recognition, 42(11), 2961\u20132978.","journal-title":"Pattern Recognition"},{"key":"2431_CR76","doi-asserted-by":"crossref","unstructured":"Zhang, L., Zhang, Y., & Tan, C. (2008). An improved physically-based method for geometric restoration of distorted document images. IEEE Transactions on Pattern Analysis and Machine Intelligence, 30(4), 728\u2013734.","DOI":"10.1109\/TPAMI.2007.70831"},{"key":"2431_CR77","doi-asserted-by":"crossref","unstructured":"Zhong, X., Tang, J., & Yepes, A. J. (2019). PubLayNet: Largest dataset ever for document layout analysis. In Proceedings of the international conference on document analysis and recognition (pp. 1015\u20131022).","DOI":"10.1109\/ICDAR.2019.00166"},{"key":"2431_CR78","doi-asserted-by":"crossref","unstructured":"Zhou, Z., Fan, X., Shi, P., & Xin, Y. (2021). R-MSFM: Recurrent multi-scale feature modulation for monocular depth estimating. In Proceedings of the IEEE international conference on computer vision (pp. 12777\u201312786).","DOI":"10.1109\/ICCV48922.2021.01254"}],"container-title":["International Journal of Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-025-02431-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11263-025-02431-5\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-025-02431-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,6]],"date-time":"2025-09-06T16:15:27Z","timestamp":1757175327000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11263-025-02431-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,5,26]]},"references-count":78,"journal-issue":{"issue":"8","published-print":{"date-parts":[[2025,8]]}},"alternative-id":["2431"],"URL":"https:\/\/doi.org\/10.1007\/s11263-025-02431-5","relation":{},"ISSN":["0920-5691","1573-1405"],"issn-type":[{"value":"0920-5691","type":"print"},{"value":"1573-1405","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,5,26]]},"assertion":[{"value":"22 December 2022","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"24 March 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"26 May 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}