{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,28]],"date-time":"2025-03-28T09:55:24Z","timestamp":1743155724790,"version":"3.40.3"},"publisher-location":"Cham","reference-count":32,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030857127"},{"type":"electronic","value":"9783030857134"}],"license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021]]},"DOI":"10.1007\/978-3-030-85713-4_19","type":"book-chapter","created":{"date-parts":[[2021,9,12]],"date-time":"2021-09-12T23:04:33Z","timestamp":1631487873000},"page":"196-205","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Towards Combining Object Detection and\u00a0Text Classification Models for\u00a0Form Entity Recognition"],"prefix":"10.1007","author":[{"given":"Mar\u00eda","family":"Villota","sequence":"first","affiliation":[]},{"given":"Gonzalo","family":"Santamar\u00eda","sequence":"additional","affiliation":[]},{"given":"C\u00e9sar","family":"Dom\u00ednguez","sequence":"additional","affiliation":[]},{"given":"J\u00f3nathan","family":"Heras","sequence":"additional","affiliation":[]},{"given":"Eloy","family":"Mata","sequence":"additional","affiliation":[]},{"given":"Vico","family":"Pascual","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,9,13]]},"reference":[{"key":"19_CR1","unstructured":"Alexey, A.B.: YOLO darknet (2018). https:\/\/github.com\/AlexeyAB\/darknet"},{"key":"19_CR2","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"833","DOI":"10.1007\/978-3-030-01234-2_49","volume-title":"Computer Vision \u2013 ECCV 2018","author":"L-C Chen","year":"2018","unstructured":"Chen, L.-C., Zhu, Y., Papandreou, G., Schroff, F., Adam, H.: Encoder-decoder with Atrous separable convolution for semantic image segmentation. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11211, pp. 833\u2013851. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01234-2_49"},{"key":"19_CR3","unstructured":"Chollet, F., et al.: Keras (2015). https:\/\/github.com\/fchollet\/keras"},{"key":"19_CR4","unstructured":"Colaboratory team: Google colaboratory (2017). https:\/\/colab.research.google.com"},{"key":"19_CR5","doi-asserted-by":"publisher","first-page":"647","DOI":"10.1007\/978-0-85729-859-1_20","volume-title":"Handbook of Document Image Processing and Recognition","author":"B Co\u00fcasnon","year":"2014","unstructured":"Co\u00fcasnon, B., Lemaitre, A.: Recognition of tables and forms. In: Doermann, D., Tombre, K. (eds.) Handbook of Document Image Processing and Recognition, pp. 647\u2013677. Springer, London (2014). https:\/\/doi.org\/10.1007\/978-0-85729-859-1_20"},{"key":"19_CR6","doi-asserted-by":"publisher","unstructured":"Devlin, J., et\u00a0al.: BERT: pre-training of deep bidirectional transformers for language understanding. In: Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers), pp. 4171\u20134186. Association for Computational Linguistics (2019). https:\/\/doi.org\/10.18653\/v1\/N19-1423","DOI":"10.18653\/v1\/N19-1423"},{"key":"19_CR7","unstructured":"Gilliam, W.: Blur: a library that integrates hugging face transformers with version 2 of the fastai framework (2021). https:\/\/github.com\/ohmeow\/blurr"},{"key":"19_CR8","doi-asserted-by":"publisher","unstructured":"Harley, A.W., Ufkes, A., Derpanis, K.G.: Evaluation of deep convolutional nets for document image classification and retrieval. In: Proceedings of the 13th International Conference on Document Analysis and Recognition (ICDAR 2015), pp. 991\u2013995 (2015). https:\/\/doi.org\/10.1109\/ICDAR.2015.7333910","DOI":"10.1109\/ICDAR.2015.7333910"},{"key":"19_CR9","unstructured":"Hong, T., et\u00a0al.: A pre-trained language model for understanding texts in document (2021). https:\/\/openreview.net\/forum?id=punMXQEsPr0"},{"key":"19_CR10","doi-asserted-by":"publisher","first-page":"108","DOI":"10.3390\/info11020108","volume":"11","author":"J Howard","year":"2020","unstructured":"Howard, J., Gugger, S.: FastAI: a layered API for deep learning. Information 11, 108 (2020). https:\/\/doi.org\/10.3390\/info11020108","journal-title":"Information"},{"key":"19_CR11","doi-asserted-by":"publisher","unstructured":"Howard, J., Ruder, S.: Universal language model fine-tuning for text classification. In: Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pp. 328\u2013339. Association for Computational Linguistics (2018). https:\/\/doi.org\/10.18653\/v1\/P18-1031","DOI":"10.18653\/v1\/P18-1031"},{"key":"19_CR12","doi-asserted-by":"publisher","unstructured":"Jaume, G., Ekenel, H.K., Thiran, J.P.: FUNSD: a dataset for form understanding in noisy scanned documents. In: Proceedings of the 2019 International Conference on Document Analysis and Recognition Workshops (ICDARW), vol.\u00a02, pp.\u00a01\u20136. IEEE (2019). https:\/\/doi.org\/10.1109\/ICDARW.2019.10029","DOI":"10.1109\/ICDARW.2019.10029"},{"key":"19_CR13","unstructured":"Lan, Z., et\u00a0al.: Albert: A lite BERT for self-supervised learning of language representations. In: International Conference on Learning Representations (2020)"},{"key":"19_CR14","doi-asserted-by":"publisher","unstructured":"Lewis, D., Agam, G., Argamon, S., Frieder, O., Grossman, D., Heard, J.: Building a test collection for complex document information processing. In: Proceedings of the 29th Annual International ACM SIGIR Conference on Research and Development in Information Retrieval (SIGIR 2006), pp. 665\u2013666. Association for Computing Machinery, New York, NY, USA (2006). https:\/\/doi.org\/10.1145\/1148170.1148307","DOI":"10.1145\/1148170.1148307"},{"key":"19_CR15","unstructured":"Liu, Y., et\u00a0al.: Roberta: a robustly optimized BERT pretraining approach. CoRR abs\/1907.11692 (2019). http:\/\/arxiv.org\/abs\/1907.11692"},{"key":"19_CR16","unstructured":"Liu, Z., et al.: Swin transformer: hierarchical vision transformer using shifted windows. CoRR abs\/2103.14030 (2021). http:\/\/arxiv.org\/abs\/2103.14030"},{"key":"19_CR17","unstructured":"Paszke, A., et\u00a0al.: Pytorch: an imperative style, high-performance deep learning library. In: Advances in Neural Information Processing Systems, vol. 32, pp. 8024\u20138035. Curran Associates, Inc. (2019)"},{"key":"19_CR18","first-page":"91","volume":"28","author":"S Ren","year":"2015","unstructured":"Ren, S., He, K., Girshick, R., Sun, J.: Faster R-CNN: towards real-time object detection with region proposal networks. Adv. Neural Inf. Process. Syst. 28, 91\u201399 (2015)","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"19_CR19","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"234","DOI":"10.1007\/978-3-319-24574-4_28","volume-title":"Medical Image Computing and Computer-Assisted Intervention \u2013 MICCAI 2015","author":"O Ronneberger","year":"2015","unstructured":"Ronneberger, O., Fischer, P., Brox, T.: U-Net: convolutional networks for biomedical image segmentation. In: Navab, N., Hornegger, J., Wells, W.M., Frangi, A.F. (eds.) MICCAI 2015. LNCS, vol. 9351, pp. 234\u2013241. Springer, Cham (2015). https:\/\/doi.org\/10.1007\/978-3-319-24574-4_28"},{"key":"19_CR20","unstructured":"Sanh, V., et\u00a0al.: DistilBERT, a distilled version of BERT: smaller, faster, cheaper and lighter. CoRR abs\/1910.01108 (2019). http:\/\/arxiv.org\/abs\/1910.01108"},{"key":"19_CR21","doi-asserted-by":"publisher","unstructured":"Smith, R.: An overview of the tesseract OCR engine. In: Proceedings of the International Conference on Document Analysis and Recognition (ICDAR), vol.\u00a02, pp. 629\u2013633 (2007). https:\/\/doi.org\/10.1109\/ICDAR.2007.4376991","DOI":"10.1109\/ICDAR.2007.4376991"},{"key":"19_CR22","doi-asserted-by":"publisher","unstructured":"Tan, M., et\u00a0al.: EfficientDet: scalable and efficient object detection. In: Proceedings of the 2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition. IEEE (2020). https:\/\/doi.org\/10.1109\/CVPR42600.2020.01079","DOI":"10.1109\/CVPR42600.2020.01079"},{"key":"19_CR23","doi-asserted-by":"crossref","unstructured":"Tian, Z., Shen, C., Chen, H., et\u00a0al.: FCOS: fully convolutional one-stage object detection. CoRR abs\/1904.01355 (2019). http:\/\/arxiv.org\/abs\/1904.01355","DOI":"10.1109\/ICCV.2019.00972"},{"key":"19_CR24","unstructured":"Vaswani, A., et\u00a0al.: Attention is all you need. In: Advances in Neural Information Processing Systems, vol.\u00a030. Curran Associates, Inc. (2017)"},{"key":"19_CR25","unstructured":"Vazquez, L., et\u00a0al.: IceVision: an agnostic object detection framework (2020). https:\/\/github.com\/airctic\/icevision"},{"key":"19_CR26","unstructured":"Wang, C.Y., Bochkovskiy, A., Liao, H.Y.M.: Scaled-yolov4: scaling cross stage partial network. CoRR (2021). http:\/\/arxiv.org\/abs\/2011.08036"},{"key":"19_CR27","doi-asserted-by":"publisher","unstructured":"Wang, J., Sun, K., Cheng, T., et\u00a0al.: Deep high-resolution representation learning for visual recognition. IEEE Trans. Pattern Anal. Mach. Intell. (2020). https:\/\/doi.org\/10.1109\/tpami.2020.2983686","DOI":"10.1109\/tpami.2020.2983686"},{"key":"19_CR28","doi-asserted-by":"publisher","unstructured":"Wang, Z., Zhan, M., Liu, X., Liang, D.: DocStruct: a multimodal method to extract hierarchy structure in document for general form understanding. In: Proceedings of the Findings of the Association for Computational Linguistics (EMNLP 2020), pp. 898\u2013908. Association for Computational Linguistics (2020). https:\/\/doi.org\/10.18653\/v1\/2020.findings-emnlp.80","DOI":"10.18653\/v1\/2020.findings-emnlp.80"},{"key":"19_CR29","doi-asserted-by":"publisher","unstructured":"Wolf, T., et\u00a0al.: Transformers: state-of-the-art natural language processing. In: Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing: System Demonstrations, pp. 38\u201345. Association for Computational Linguistics (2020). https:\/\/doi.org\/10.18653\/v1\/2020.emnlp-demos.6","DOI":"10.18653\/v1\/2020.emnlp-demos.6"},{"key":"19_CR30","doi-asserted-by":"publisher","unstructured":"Xu, Y., Li, M., Cui, L., Huang, S., Wei, F., Zhou, M.: LayoutLM: pre-training of text and layout for document image understanding. In: Proceedings of the 26th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining (KDD 2020), pp. 1192-1200. ACM (2020). https:\/\/doi.org\/10.1145\/3394486.3403172","DOI":"10.1145\/3394486.3403172"},{"key":"19_CR31","unstructured":"Xu, Y., et\u00a0al.: LayoutLMv2: multi-modal pre-training for visually-rich document understanding. CoRR abs\/2012.14740 (2020). http:\/\/arxiv.org\/abs\/2012.14740"},{"key":"19_CR32","doi-asserted-by":"crossref","unstructured":"Zhu, C., He, Y., Savvides, M.: Feature selective anchor-free module for single-shot object detection. CoRR abs\/1903.00621 (2019). http:\/\/arxiv.org\/abs\/1903.00621","DOI":"10.1109\/CVPR.2019.00093"}],"container-title":["Lecture Notes in Computer Science","Advances in Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-85713-4_19","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,9,12]],"date-time":"2021-09-12T23:11:02Z","timestamp":1631488262000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-85713-4_19"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"ISBN":["9783030857127","9783030857134"],"references-count":32,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-85713-4_19","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2021]]},"assertion":[{"value":"13 September 2021","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"CAEPIA","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Conference of the Spanish Association for Artificial Intelligence","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"M\u00e1laga","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Spain","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2021","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22 September 2021","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"24 September 2021","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"19","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"caepia2021","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/caepia2020.uma.es\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"40","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"25","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"63% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"The 2020 conference was cancelled due to COVID-19 and held together with CAEPIA 2021","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}