{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T12:43:43Z","timestamp":1740141823109,"version":"3.37.3"},"reference-count":38,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2020,8,10]],"date-time":"2020-08-10T00:00:00Z","timestamp":1597017600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,8,10]],"date-time":"2020-08-10T00:00:00Z","timestamp":1597017600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/501100001809","name":"the National Natural Science Foundation of China","doi-asserted-by":"crossref","award":["71621002"],"award-info":[{"award-number":["71621002"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]},{"name":"the Key Programs of the Chinese Academy of Sciences","award":["ZDBS-SSW-JSC003","ZDBS-SSW-JSC004"],"award-info":[{"award-number":["ZDBS-SSW-JSC003","ZDBS-SSW-JSC004"]}]},{"name":"the Key Programs of the Chinese Academy of Sciences","award":["ZDBS-SSW-JSC005"],"award-info":[{"award-number":["ZDBS-SSW-JSC005"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["IJDAR"],"published-print":{"date-parts":[[2020,12]]},"DOI":"10.1007\/s10032-020-00358-w","type":"journal-article","created":{"date-parts":[[2020,8,10]],"date-time":"2020-08-10T22:02:44Z","timestamp":1597096964000},"page":"267-277","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":8,"title":["DetectGAN: GAN-based text detector for camera-captured document images"],"prefix":"10.1007","volume":"23","author":[{"given":"Jinyuan","family":"Zhao","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yanna","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Baihua","family":"Xiao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Cunzhao","family":"Shi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Fuxi","family":"Jia","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chunheng","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2020,8,10]]},"reference":[{"key":"358_CR1","doi-asserted-by":"crossref","unstructured":"Chen, F., Carter, S., Denoue, L. et al.: SmartDCap: semi-automatic capture of higher quality document images from a smartphone. In: Proceedings of the 2013 International Conference on Intelligent User Interfaces, pp. 287\u2013296. ACM (2013)","DOI":"10.1145\/2449396.2449433"},{"key":"358_CR2","doi-asserted-by":"crossref","unstructured":"Kumar, J., Bala, R., Ding, H., Emmett, P.: Mobile video capture of multi-page documents. In: Conference on Computer Vision and Pattern Recognition Workshops, p. 3540. IEEE (2013)","DOI":"10.1109\/CVPRW.2013.10"},{"key":"358_CR3","unstructured":"Yao, C., Bai, X., Sang, N., Zhou, X., Zhou, S., Cao, Z.: Scene text detection via holistic, multi-channel prediction. CoRR, arXiv:1606.09002 (2016)"},{"key":"358_CR4","doi-asserted-by":"crossref","unstructured":"Lyu, P., Liao, M., Yao, C., et al.: Mask textspotter: an end-to-end trainable neural network for spotting text with arbitrary shapes. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 67\u201383 (2018)","DOI":"10.1007\/978-3-030-01264-9_5"},{"key":"358_CR5","doi-asserted-by":"crossref","unstructured":"Baek, Y., Lee, B., Han, D., Yun, S., Lee, H.: Character region awareness for text detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 9365\u20139374 (2019)","DOI":"10.1109\/CVPR.2019.00959"},{"issue":"2","key":"358_CR6","first-page":"248256","volume":"34","author":"K Ohta","year":"1993","unstructured":"Ohta, K.: Character segmentation of address reading\/letter sorting machine for the ministry of posts and telecommunications of Japan. NEC Res. Dev. 34(2), 248256 (1993)","journal-title":"NEC Res. Dev."},{"issue":"10","key":"358_CR7","first-page":"10451050","volume":"18","author":"S-W Lee","year":"1996","unstructured":"Lee, S.-W., Lee, D.-J., Park, H.-S.: A new methodology for grayscale character segmentation and recognition. IEEE Trans. Pattern Anal. Mach. Intell. 18(10), 10451050 (1996)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"358_CR8","doi-asserted-by":"crossref","unstructured":"Shivakumara, P., Bhowmick, S., Su, B., Tan, C.L., Pal, U.: A new gradient based character segmentation method for video text recognition. In: International Conference on Document Analysis and Recognition (ICDAR), p. 126130. IEEE (2011)","DOI":"10.1109\/ICDAR.2011.34"},{"issue":"12","key":"358_CR9","doi-asserted-by":"publisher","first-page":"1322","DOI":"10.1109\/34.41371","volume":"11","author":"T Taxt","year":"1989","unstructured":"Taxt, T., Flynn, P.J., Jain, A.K.: Segmentation of document images. IEEE Trans. Pattern Anal. Mach. Intell. 11(12), 1322\u20131329 (1989)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"358_CR10","doi-asserted-by":"crossref","unstructured":"Busta, M., Neumann, L., Fastext, Matas J.: Efficient unconstrained scene text detector. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 1206\u20131214 (2015)","DOI":"10.1109\/ICCV.2015.143"},{"issue":"11","key":"358_CR11","doi-asserted-by":"publisher","first-page":"5358","DOI":"10.1109\/TIP.2016.2607418","volume":"25","author":"HI Koo","year":"2016","unstructured":"Koo, H.I.: Text-line detection in camera-captured document images using the state estimation of connected components. IEEE Trans. Image Process. 25(11), 5358\u20135368 (2016)","journal-title":"IEEE Trans. Image Process."},{"issue":"1","key":"358_CR12","first-page":"6266","volume":"19","author":"N Otsu","year":"1979","unstructured":"Otsu, N.: A threshold selection method from gray-level histograms. IEEE Trans. Syst. Man Cybern. 19(1), 6266 (1979)","journal-title":"IEEE Trans. Syst. Man Cybern."},{"key":"358_CR13","unstructured":"Bernsen, J.: Dynamic thresholding of grey-level images. In: ICPR, p. 12511255. IEEE (1986)"},{"key":"358_CR14","volume-title":"An Introduction to Digital Image Processing","author":"W Niblack","year":"1985","unstructured":"Niblack, W.: An Introduction to Digital Image Processing. Strandberg Publishing Company, Copenhagen (1985)"},{"issue":"2","key":"358_CR15","doi-asserted-by":"publisher","first-page":"225236","DOI":"10.1016\/S0031-3203(99)00055-2","volume":"33","author":"J Sauvola","year":"2000","unstructured":"Sauvola, J., Pietikainen, M.: Adaptive document image binarization. Pattern Recogn. 33(2), 225236 (2000)","journal-title":"Pattern Recogn."},{"key":"358_CR16","doi-asserted-by":"crossref","unstructured":"Liu, W., Anguelov, D., Erhan, D., Szegedy, C., Reed, S.E., Fu, C., Berg, A.C.: SSD: single shot multibox detector. In: Computer Vision\u2014ECCV 2016\u201414th European Conference, Amsterdam, The Netherlands, 11\u201314 Oct 2016, Proceedings. Part I, p. 2137 (2016)","DOI":"10.1007\/978-3-319-46448-0_2"},{"key":"358_CR17","doi-asserted-by":"crossref","unstructured":"Redmon, J., Divvala, S.K., Girshick, R.B., Farhadi, A.: You only look once: unified, real-time object detection. In: 2016 IEEE Conference on Computer Vision and Pattern Recognition, CVPR 2016, Las Vegas, NV, USA, 27\u201330 June 2016, p. 779788 (2016)","DOI":"10.1109\/CVPR.2016.91"},{"key":"358_CR18","unstructured":"Huang, L., Yang, Y., Deng, Y., Yu, Y.: Densebox: unifying landmark localization with end to end object detection. CoRR, arXiv:1509.04874 (2015)"},{"key":"358_CR19","doi-asserted-by":"crossref","unstructured":"Liao, M., Shi, B., Bai, X., Wang, X., Liu, W.: Textboxes: a fast text detector with a single deep neural network. In: Proceedings of the Thirty-First AAAI Conference on Artificial Intelligence, 4\u20139 Feb 2017, San Francisco, California, USA, p. 41614167 (2017)","DOI":"10.1609\/aaai.v31i1.11196"},{"issue":"8","key":"358_CR20","doi-asserted-by":"publisher","first-page":"3676","DOI":"10.1109\/TIP.2018.2825107","volume":"27","author":"M Liao","year":"2018","unstructured":"Liao, M., Shi, B., Xiang, B.: TextBoxes++: a single-shot oriented scene text detector. IEEE Trans. Image Process. 27(8), 3676\u20133690 (2018)","journal-title":"IEEE Trans. Image Process."},{"issue":"99","key":"358_CR21","first-page":"2999","volume":"PP","author":"TY Lin","year":"2017","unstructured":"Lin, T.Y., Goyal, P., Girshick, R., et al.: Focal loss for dense object detection. IEEE Trans. Pattern Anal. Mach. Intell. PP(99), 2999\u20133007 (2017)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"358_CR22","doi-asserted-by":"crossref","unstructured":"Liao, M., Zhu, Z., Shi, B., et al.: Rotation-sensitive regression for oriented scene text detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5909\u20135918 (2018)","DOI":"10.1109\/CVPR.2018.00619"},{"key":"358_CR23","doi-asserted-by":"crossref","unstructured":"Shi, B., Bai, X., Belongie, S.: Detecting oriented text in natural images by linking segments. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2550\u20132558 (2017)","DOI":"10.1109\/CVPR.2017.371"},{"key":"358_CR24","doi-asserted-by":"crossref","unstructured":"Shi, B., Bai, X., Belongie, S.: Detecting oriented text in natural images by linking segments. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2550\u20132558 (2017)","DOI":"10.1109\/CVPR.2017.371"},{"key":"358_CR25","doi-asserted-by":"crossref","unstructured":"Li, X., Wang, W., Hou, W., Liu, R. Z., Lu, T., Yang, J.: Shape robust text detection with progressive scale expansion network (2018)","DOI":"10.1109\/CVPR.2019.00956"},{"key":"358_CR26","doi-asserted-by":"crossref","unstructured":"Lyu, P., Yao., C, Wu, W., et al.: Multi-oriented scene text detection via corner localization and region segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7553\u20137563 (2018)","DOI":"10.1109\/CVPR.2018.00788"},{"key":"358_CR27","doi-asserted-by":"crossref","unstructured":"Gupta, A., Vedaldi, A., Zisserman, A.: Synthetic data for text localisation in natural images. In: 2016 IEEE Conference on Computer Vision and Pattern Recognition, CVPR 2016, Las Vegas, NV, USA, 27\u201330 June 2016, p. 23152324 (2016)","DOI":"10.1109\/CVPR.2016.254"},{"key":"358_CR28","doi-asserted-by":"crossref","unstructured":"Zhang, Z., Zhang, C., Shen, W., Yao, C., Liu, W., Bai, X.: Multi-oriented text detection with fully convolutional networks. In: 2016 IEEE Conference on Computer Vision and Pattern Recognition, CVPR 2016, Las Vegas, NV, USA, 27\u201330 June 2016, p. 41594167 (2016)","DOI":"10.1109\/CVPR.2016.451"},{"key":"358_CR29","unstructured":"Kingma, D.P., Welling, M.: Auto-encoding variational Bayes. arXiv preprint arXiv:1312.6114 (2013)"},{"key":"358_CR30","unstructured":"Rezende, D.J., Mohamed, S., Wierstra, D.: Stochastic backpropagation and approximate inference in deep generative models. In: International Conference on Machine Learning, pp. 1278\u20131286 (2014)"},{"key":"358_CR31","unstructured":"Oord, A.V.D., Kalchbrenner, N., Kavukcuoglu, K.: Pixel recurrent neural networks. In: International Conference on Machine Learning, pp. 1747\u20131756 (2016)"},{"key":"358_CR32","doi-asserted-by":"crossref","unstructured":"Ronneberger, O., Fischer, P., Brox, T.: U-net: Convolutional networks for biomedical image segmentation. In: International Conference on Medical Image Computing and Computer-Assisted Intervention, vol. 9351, pp. 234\u2013241 (2015)","DOI":"10.1007\/978-3-319-24574-4_28"},{"key":"358_CR33","doi-asserted-by":"crossref","unstructured":"Long, J., Shelhamer, E., Darrell, T.: Fully convolutional networks for semantic segmentation. In: IEEE Conference on Computer Vision and Pattern Recognition, pp. 3431\u20133440 (2015)","DOI":"10.1109\/CVPR.2015.7298965"},{"key":"358_CR34","doi-asserted-by":"crossref","unstructured":"Long, J., Shelhamer, E., Darrell, T.: Fully convolutional networks for semantic segmentation. In: IEEE Conference on Computer Vision and Pattern Recognition, pp. 3431\u20133440 (2015)","DOI":"10.1109\/CVPR.2015.7298965"},{"issue":"(Complete)","key":"358_CR35","doi-asserted-by":"publisher","first-page":"348","DOI":"10.1016\/j.patcog.2016.08.005","volume":"61","author":"XY Zhang","year":"2017","unstructured":"Zhang, X.Y., Bengio, Y., Liu, C.L.: Online and offline handwritten Chinese character recognition: a comprehensive study and new benchmark. Pattern Recogn. 61((Complete)), 348\u2013360 (2017)","journal-title":"Pattern Recogn."},{"key":"358_CR36","unstructured":"Goodfellow, I.J., Pouget-Abadie, J., Mirza, M., Xu, B., Warde-Farley, D., Ozair, S., Courville, A., Bengio, Y.: Generative adversarial nets. In: International Conference on Neural Information Processing Systems, pp. 2672\u20132680 (2014)"},{"key":"358_CR37","unstructured":"Denton, E., Chintala, S., Szlam, A., Fergus, R.: Deep generative image models using a Laplacian pyramid of adversarial networks. In: International Conference on Neural Information Processing Systems, pp. 1486\u20131494 (2015)"},{"key":"358_CR38","unstructured":"Reed, S., Akata, Z., Yan, X., Logeswaran, L., Schiele, B., Lee, H.: Generative adversarial text to image synthesis. In: International Conference on International Conference on. Machine Learning, pp. 1060\u20131069 (2016)"}],"container-title":["International Journal on Document Analysis and Recognition (IJDAR)"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10032-020-00358-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10032-020-00358-w\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10032-020-00358-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,11,6]],"date-time":"2022-11-06T12:28:11Z","timestamp":1667737691000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10032-020-00358-w"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,8,10]]},"references-count":38,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2020,12]]}},"alternative-id":["358"],"URL":"https:\/\/doi.org\/10.1007\/s10032-020-00358-w","relation":{},"ISSN":["1433-2833","1433-2825"],"issn-type":[{"type":"print","value":"1433-2833"},{"type":"electronic","value":"1433-2825"}],"subject":[],"published":{"date-parts":[[2020,8,10]]},"assertion":[{"value":"1 December 2019","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"10 June 2020","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"23 July 2020","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"10 August 2020","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}