{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,28]],"date-time":"2025-03-28T08:34:05Z","timestamp":1743150845290,"version":"3.40.3"},"publisher-location":"Cham","reference-count":48,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031416811"},{"type":"electronic","value":"9783031416828"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-41682-8_19","type":"book-chapter","created":{"date-parts":[[2023,8,18]],"date-time":"2023-08-18T07:02:59Z","timestamp":1692342179000},"page":"297-313","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["On Web-based Visual Corpus Construction for\u00a0Visual Document Understanding"],"prefix":"10.1007","author":[{"given":"Donghyun","family":"Kim","sequence":"first","affiliation":[]},{"given":"Teakgyu","family":"Hong","sequence":"additional","affiliation":[]},{"given":"Moonbin","family":"Yim","sequence":"additional","affiliation":[]},{"given":"Yoonsik","family":"Kim","sequence":"additional","affiliation":[]},{"given":"Geewook","family":"Kim","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,8,19]]},"reference":[{"key":"19_CR1","unstructured":"Attardi, G.: WikiExtractor. https:\/\/github.com\/attardi\/wikiextractor (2015)"},{"key":"19_CR2","doi-asserted-by":"crossref","unstructured":"Biten, A.F., Tito, R., Gomez, L., Valveny, E., Karatzas, D.: Ocr-idl: ocr annotations for industry document library dataset. In: European Conference on Computer Vision Workshop (ECCV Workshop) (2022)","DOI":"10.1007\/978-3-031-25069-9_16"},{"key":"19_CR3","doi-asserted-by":"crossref","unstructured":"Chi, Z., et al.: InfoXLM: an information-theoretic framework for cross-lingual language model pre-training. In: Annual Conference of the North American Chapter of the Association for Computational Linguistics (NAACL) (2021)","DOI":"10.18653\/v1\/2021.naacl-main.280"},{"key":"19_CR4","doi-asserted-by":"crossref","unstructured":"Chng, C.K., et al.: ICDAR2019 robust reading challenge on arbitrary-shaped text - RRC-art. In: International Conference on Document Analysis and Recognition (ICDAR) (2019)","DOI":"10.1109\/ICDAR.2019.00252"},{"key":"19_CR5","doi-asserted-by":"crossref","unstructured":"Ch\u2019ng, C.K., Chan, C.S., Liu, C.: Total-text: towards orientation robustness in scene text detection. International Journal on Document Analysis and Recognition (IJDAR) (2020)","DOI":"10.1007\/s10032-019-00334-z"},{"key":"19_CR6","unstructured":"Conneau, A., Lample, G.: Crosslingual language model pretraining. In: In Advances in Neural Information Processing Systems (NeurIPS) (2019)"},{"key":"19_CR7","doi-asserted-by":"crossref","unstructured":"Davis, B., Morse, B., Price, B., Tensmeyer, C., Wigington, C., Morariu, V.: End-to-end document recognition and understanding with dessurt. In: European Conference on Computer Vision (ECCV) (2022)","DOI":"10.1007\/978-3-031-25069-9_19"},{"key":"19_CR8","unstructured":"Deng, X., Shiralkar, P., Lockard, C., Huang, B., Sun, H.: Dom-lm: learning generalizable representations for html documents (2022). https:\/\/arxiv.org\/abs\/2201.10608"},{"key":"19_CR9","unstructured":"Devlin, J., Chang, M., Lee, K., Toutanova, K.: BERT: pre-training of deep bidirectional transformers for language understanding. In: Annual Conference of the North American Chapter of the Association for Computational Linguistics (NAACL) (2019)"},{"key":"19_CR10","unstructured":"Dosovitskiy, A., et al.: An image is worth 16x16 words: transformers for image recognition at scale. In: International Conference on Learning Representations (ICLR) (2021)"},{"key":"19_CR11","unstructured":"Goyal, P., et al.: Accurate, large minibatch SGD: training imagenet in 1 hour (2017). https:\/\/arxiv.org\/abs\/1706.02677"},{"key":"19_CR12","doi-asserted-by":"crossref","unstructured":"Gupta, A., Vedaldi, A., Zisserman, A.: Synthetic data for text localisation in natural images. In: IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2016)","DOI":"10.1109\/CVPR.2016.254"},{"key":"19_CR13","doi-asserted-by":"crossref","unstructured":"Harley, A.W., Ufkes, A., Derpanis, K.G.: Evaluation of deep convolutional nets for document image classification and retrieval. In: International Conference on Document Analysis and Recognition (ICDAR) (2015)","DOI":"10.1109\/ICDAR.2015.7333910"},{"key":"19_CR14","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"19_CR15","unstructured":"Hong, T., Kim, D., Ji, M., Hwang, W., Nam, D., Park, S.: BROS: a pre-trained language model for understanding texts in document. In: AAAI Conference on Artificial Intelligence (AAAI) (2022)"},{"key":"19_CR16","doi-asserted-by":"crossref","unstructured":"Huang, Y., Lv, T., Cui, L., Lu, Y., Wei, F.: LayoutLMv3: pre-training for document AI with unified text and image masking. In: ACM International Conference on Multimedia (ACM MM) (2022)","DOI":"10.1145\/3503161.3548112"},{"key":"19_CR17","unstructured":"Hwang, W., et al.: Post-ocr parsing: building simple and robust parser via bio tagging. In: Workshop on Document Intelligence at NeurIPS (NeurIPS Workshop) (2019)"},{"key":"19_CR18","doi-asserted-by":"crossref","unstructured":"Hwang, W., Lee, H., Yim, J., Kim, G., Seo, M.: Cost-effective end-to-end information extraction for semi-structured document images. In: Empirical Methods in Natural Language Processing (EMNLP) (2021)","DOI":"10.18653\/v1\/2021.emnlp-main.271"},{"key":"19_CR19","unstructured":"Jaderberg, M., Simonyan, K., Vedaldi, A., Zisserman, A.: Synthetic data and artificial neural networks for natural scene text recognition. In: International Conference on Neural Information Processing Systems Workshop (NIPS Workshop) (2014)"},{"key":"19_CR20","doi-asserted-by":"crossref","unstructured":"Jaume, G., Ekenel, H.K., Thiran, J.P.: Funsd: a dataset for form understanding in noisy scanned documents. In: ICDAR Workshop on Open Services and Tools for Document Analysis (ICDAR-OST) (2019)","DOI":"10.1109\/ICDARW.2019.10029"},{"key":"19_CR21","unstructured":"Junlong, L., Xu, Y., Cui, L., Wei, F.: Markuplm: pre-training of text and markup language for visually rich document understanding. In: Annual Meeting of the Association for Computational Linguistics (ACL) (2022)"},{"key":"19_CR22","doi-asserted-by":"crossref","unstructured":"Kim, G., et al.: Donut: Document understanding transformer without OCR. In: European Conference on Computer Vision (ECCV) (2022)","DOI":"10.1007\/978-3-031-19815-1_29"},{"key":"19_CR23","unstructured":"Kingma, D.P., Ba, J.: Adam: A method for stochastic optimization. In: International Conference on Learning Representations (ICLR) (2015)"},{"key":"19_CR24","unstructured":"Krylov, I., Nosov, S., Sovrasov, V.: Open images v5 text annotation and yet another mask text spotter. In: Asian Conference on Machine Learning (ACML) (2021)"},{"key":"19_CR25","doi-asserted-by":"crossref","unstructured":"Kuhn, H.W.: The hungarian method for the assignment problem. Naval Research Logistics Quarterly (NRLQ) (1955)","DOI":"10.1002\/nav.3800020109"},{"key":"19_CR26","unstructured":"Lee, K., et al.: Pix2struct: Screenshot parsing as pretraining for visual language understanding (2022). https:\/\/arxiv.org\/abs\/2210.03347"},{"key":"19_CR27","doi-asserted-by":"crossref","unstructured":"Lewis, D., Agam, G., Argamon, S., Frieder, O., Grossman, D., Heard, J.: Building a test collection for complex document information processing. In: International ACM SIGIR Conference on Research and Development in Information Retrieval (SIGIR) (2006)","DOI":"10.1145\/1148170.1148307"},{"key":"19_CR28","doi-asserted-by":"crossref","unstructured":"Long, S., Qin, S., Panteleev, D., Bissacco, A., Fujii, Y., Raptis, M.: Towards end-to-end unified scene text detection and layout analysis. In: IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2022)","DOI":"10.1109\/CVPR52688.2022.00112"},{"key":"19_CR29","unstructured":"Loshchilov, I., Hutter, F.: SGDR: Stochastic gradient descent with warm restarts. In: International Conference on Learning Representations (ICLR) (2017)"},{"key":"19_CR30","unstructured":"Loshchilov, I., Hutter, F.: Decoupled weight decay regularization. In: International Conference on Learning Representations (ICLR) (2019)"},{"key":"19_CR31","unstructured":"Maini, S., Groleau, A., Chee, K.W., Larson, S., Boarman, J.: Augraphy: a data augmentation library for document images (2022). https:\/\/arxiv.org\/abs\/2208.14558"},{"key":"19_CR32","doi-asserted-by":"crossref","unstructured":"Mathew, M., Bagal, V., Tito, R., Karatzas, D., Valveny, E., Jawahar, C.: InfographicVQA. In: IEEE Winter Conference on Applications of Computer Vision (WACV) (2022)","DOI":"10.1109\/WACV51458.2022.00264"},{"key":"19_CR33","unstructured":"Micikevicius, P., et al.: Mixed precision training. In: International Conference on Learning Representations (ICLR) (2018)"},{"key":"19_CR34","unstructured":"Mittelbach, F., Sch\u00f6pf, R.: With latex into the nineties. In: TUGboat (1989)"},{"key":"19_CR35","doi-asserted-by":"crossref","unstructured":"Nayef, N., et al.: ICDAR2019 robust reading challenge on multi-lingual scene text detection and recognition - RRC-MLT-2019. In: International Conference on Document Analysis and Recognition (ICDAR) (2019)","DOI":"10.1109\/ICDAR.2019.00254"},{"key":"19_CR36","doi-asserted-by":"crossref","unstructured":"Nayef, N., et al.: ICDAR2017 robust reading challenge on multi-lingual scene text detection and script identification - RRC-MLT. In: International Conference on Document Analysis and Recognition (ICDAR) (2017)","DOI":"10.1109\/ICDAR.2017.237"},{"key":"19_CR37","doi-asserted-by":"crossref","unstructured":"Singh, A., Pang, G., Toh, M., Huang, J., Galuba, W., Hassner, T.: TextOCR: towards large-scale end-to-end reasoning for arbitrary-shaped scene text. In: IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2021)","DOI":"10.1109\/CVPR46437.2021.00869"},{"key":"19_CR38","doi-asserted-by":"crossref","unstructured":"Sun, Y., et al.: ICDAR 2019 competition on large-scale street view text with partial labeling - RRC-LSVT. In: International Conference on Document Analysis and Recognition (ICDAR) (2019)","DOI":"10.1109\/ICDAR.2019.00250"},{"key":"19_CR39","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"635","DOI":"10.1007\/978-3-030-86337-1_42","volume-title":"Document Analysis and Recognition - ICDAR 2021","author":"R Tito","year":"2021","unstructured":"Tito, R., Mathew, M., Jawahar, C.V., Valveny, E., Karatzas, D.: ICDAR 2021 competition on document visual question answering. In: Llad\u00f3s, J., Lopresti, D., Uchida, S. (eds.) ICDAR 2021. LNCS, vol. 12824, pp. 635\u2013649. Springer, Cham (2021). https:\/\/doi.org\/10.1007\/978-3-030-86337-1_42"},{"key":"19_CR40","unstructured":"Vaswani, A., et al.: Attention is all you need. In: International Conference on Neural Information Processing Systems (NIPS) (2017)"},{"key":"19_CR41","doi-asserted-by":"crossref","unstructured":"Xu, Y., et al.: LayoutLMv2: multi-modal pre-training for visually-rich document understanding. In: Annual Meeting of the Association for Computational Linguistics (ACL) (2021)","DOI":"10.18653\/v1\/2021.acl-long.201"},{"key":"19_CR42","doi-asserted-by":"crossref","unstructured":"Xu, Y., Li, M., Cui, L., Huang, S., Wei, F., Zhou, M.: LayoutLM: pre-training of text and layout for document image understanding. In: Knowledge Discovery and Data Mining (KDD) (2019)","DOI":"10.1145\/3394486.3403172"},{"key":"19_CR43","unstructured":"Xu, Y., et al.: LayoutXLM: multimodal pre-training for multilingual visually-rich document understanding (2021). https:\/\/arxiv.org\/abs\/2104.08836"},{"key":"19_CR44","unstructured":"Yao, C., Bai, X., Liu, W., Ma, Y., Tu, Z.: Detecting texts of arbitrary orientations in natural images. In: IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2012)"},{"key":"19_CR45","doi-asserted-by":"crossref","unstructured":"Yim, M., Kim, Y., Cho, H.C., Park, S.: SynthTIGER: synthetic text image generator towards better text recognition models. In: International Conference on Document Analysis and Recognition (ICDAR) (2021)","DOI":"10.1007\/978-3-030-86337-1_8"},{"key":"19_CR46","unstructured":"Yuliang, L., Lianwen, J., Zhang, S., Sheng, Z.: Detecting curve text in the wild: new dataset and new solution (2017). https:\/\/arxiv.org\/abs\/1712.02170"},{"issue":"6","key":"19_CR47","doi-asserted-by":"publisher","first-page":"1245","DOI":"10.1137\/0218082","volume":"18","author":"K Zhang","year":"1989","unstructured":"Zhang, K., Shasha, D.: Simple fast algorithms for the editing distance between trees and related problems. SIAM J. Comput. (SICOMP) 18(6), 1245\u20131262 (1989)","journal-title":"SIAM J. Comput. (SICOMP)"},{"key":"19_CR48","doi-asserted-by":"crossref","unstructured":"Zhong, X., ShafieiBavani, E., Jimeno Yepes, A.: Image-based table recognition: data, model, and evaluation. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.M. (eds.) European Conference on Computer Vision (ECCV) (2020)","DOI":"10.1007\/978-3-030-58589-1_34"}],"container-title":["Lecture Notes in Computer Science","Document Analysis and Recognition - ICDAR 2023"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-41682-8_19","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,8,18]],"date-time":"2023-08-18T07:20:09Z","timestamp":1692343209000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-41682-8_19"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031416811","9783031416828"],"references-count":48,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-41682-8_19","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"19 August 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICDAR","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Document Analysis and Recognition","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"San Jos\u00e9, CA","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"USA","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"21 August 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26 August 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icdar2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/icdar2023.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Easychair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"316","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"154","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"49% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2.89","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1.50","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Number and type of other papers accepted : IJDAR track papers","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}