{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,2]],"date-time":"2026-01-02T07:46:15Z","timestamp":1767339975949,"version":"3.40.3"},"publisher-location":"Cham","reference-count":27,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031416781"},{"type":"electronic","value":"9783031416798"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-41679-8_12","type":"book-chapter","created":{"date-parts":[[2023,8,18]],"date-time":"2023-08-18T07:02:59Z","timestamp":1692342179000},"page":"205-220","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Information Extraction from\u00a0Documents: Question Answering Vs Token Classification in\u00a0Real-World Setups"],"prefix":"10.1007","author":[{"given":"Laurent","family":"Lam","sequence":"first","affiliation":[]},{"given":"Pirashanth","family":"Ratnamogan","sequence":"additional","affiliation":[]},{"given":"Jo\u00ebl","family":"Tang","sequence":"additional","affiliation":[]},{"given":"William","family":"Vanhuffel","sequence":"additional","affiliation":[]},{"given":"Fabien","family":"Caspani","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,8,19]]},"reference":[{"key":"12_CR1","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: BERT: pre-training of deep bidirectional transformers for language understanding. In: Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers) (2019). http:\/\/aclanthology.org\/N19-1423.pdf"},{"key":"12_CR2","doi-asserted-by":"publisher","unstructured":"Douzon, T., Duffner, S., Garcia, C., Espinas, J.: Improving information extraction on business documents with specific pre-training tasks. In: Document Analysis Systems 15th IAPR International Workshop, DAS 2022. LNCS, vol. 13237, pp. 111\u2013125. Springer International Publishing, La Rochelle, France (2022). https:\/\/doi.org\/10.1007\/978-3-031-06555-2_8, http:\/\/hal.archives-ouvertes.fr\/hal-03676134","DOI":"10.1007\/978-3-031-06555-2_8"},{"key":"12_CR3","unstructured":"Hendrycks, D., Burns, C., Chen, A., Ball, S.: CUAD: an expert-annotated NLP dataset for legal contract review. arXiv preprint arXiv:2103.06268 (2021)"},{"key":"12_CR4","doi-asserted-by":"publisher","unstructured":"Huang, Y., Lv, T., Cui, L., Lu, Y., Wei, F.: Layoutlmv3: pre-training for document AI with unified text and image masking. In: Proceedings of the 30th ACM International Conference on Multimedia, pp. 4083\u20134091. MM 2022, Association for Computing Machinery, New York, NY, USA (2022). https:\/\/doi.org\/10.1145\/3503161.3548112","DOI":"10.1145\/3503161.3548112"},{"key":"12_CR5","doi-asserted-by":"crossref","unstructured":"Huang, Z., et al.: ICDAR 2019 competition on scanned receipt OCR and information extraction, pp. 1516\u20131520 (2019). http:\/\/arxiv.org\/pdf\/2103.10213.pdf","DOI":"10.1109\/ICDAR.2019.00244"},{"key":"12_CR6","doi-asserted-by":"publisher","unstructured":"Jaume, G., Kemal Ekenel, H., Thiran, J.P.: FUNSD: a dataset for form understanding in noisy scanned documents. In: 2019 International Conference on Document Analysis and Recognition Workshops (ICDARW), vol. 2, pp. 1\u20136 (2019). https:\/\/doi.org\/10.1109\/ICDARW.2019.10029","DOI":"10.1109\/ICDARW.2019.10029"},{"key":"12_CR7","doi-asserted-by":"publisher","unstructured":"Kwiatkowski, T., et al.: Natural questions: a benchmark for question answering research. Trans. Assoc. Comput. Linguist. 7, 452\u2013466 (2019). https:\/\/doi.org\/10.1162\/tacl_a_00276, http:\/\/aclanthology.org\/Q19-1026","DOI":"10.1162\/tacl_a_00276"},{"key":"12_CR8","doi-asserted-by":"publisher","unstructured":"Li, X., Feng, J., Meng, Y., Han, Q., Wu, F., Li, J.: A unified MRC framework for named entity recognition (2019). https:\/\/doi.org\/10.48550\/ARXIV.1910.11476, http:\/\/arxiv.org\/abs\/1910.11476","DOI":"10.48550\/ARXIV.1910.11476"},{"key":"12_CR9","doi-asserted-by":"publisher","unstructured":"Mengge, X., Yu, B., Zhang, Z., Liu, T., Zhang, Y., Wang, B.: Coarse-to-fine pre-training for named entity recognition. In: Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP), pp. 6345\u20136354. Association for Computational Linguistics, November 2020. https:\/\/doi.org\/10.18653\/v1\/2020.emnlp-main.514, http:\/\/aclanthology.org\/2020.emnlp-main.514","DOI":"10.18653\/v1\/2020.emnlp-main.514"},{"key":"12_CR10","doi-asserted-by":"crossref","unstructured":"Oussaid, I., Vanhuffel, W., Ratnamogan, P., Hajaiej, M., Mathey, A., Gilles, T.: Information extraction from visually rich documents with font style embeddings. CoRR abs\/2111.04045 (2021). http:\/\/arxiv.org\/abs\/2111.04045","DOI":"10.1109\/ICPR56361.2022.9956120"},{"key":"12_CR11","unstructured":"Park, S., et al.: Cord: a consolidated receipt dataset for post-OCR parsing (2019)"},{"key":"12_CR12","doi-asserted-by":"publisher","unstructured":"Rajpurkar, P., Jia, R., Liang, P.: Know what you don\u2019t know: Unanswerable questions for SQuAD. In: Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers), pp. 784\u2013789. Association for Computational Linguistics, Melbourne, Australia, July 2018. https:\/\/doi.org\/10.18653\/v1\/P18-2124, http:\/\/aclanthology.org\/P18-2124","DOI":"10.18653\/v1\/P18-2124"},{"key":"12_CR13","doi-asserted-by":"publisher","unstructured":"Rajpurkar, P., Zhang, J., Lopyrev, K., Liang, P.: SQuAD: 100,000+ questions for machine comprehension of text. In: Proceedings of the 2016 Conference on Empirical Methods in Natural Language Processing, pp. 2383\u20132392. Association for Computational Linguistics, Austin, Texas, November 2016. https:\/\/doi.org\/10.18653\/v1\/D16-1264, http:\/\/aclanthology.org\/D16-1264","DOI":"10.18653\/v1\/D16-1264"},{"key":"12_CR14","doi-asserted-by":"publisher","unstructured":"Ramshaw, L.A., Marcus, M.P.: Text chunking using transformation-based learning (1995). https:\/\/doi.org\/10.48550\/ARXIV.CMP-LG\/9505040, http:\/\/arxiv.org\/abs\/cmp-lg\/9505040","DOI":"10.48550\/ARXIV.CMP-LG\/9505040"},{"key":"12_CR15","unstructured":"Saha, A., Finegan-Dollak, C., Verma, A.: Position masking for improved layout-aware document understanding. CoRR abs\/2109.00442 (2021). http:\/\/arxiv.org\/abs\/2109.00442"},{"key":"12_CR16","doi-asserted-by":"publisher","unstructured":"Shrimal, A., Jain, A., Mehta, K., Yenigalla, P.: NER-MQMRC: formulating named entity recognition as multi question machine reading comprehension. In: Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: Industry Track, pp. 230\u2013238. Association for Computational Linguistics, Hybrid: Seattle, Washington + Online, July 2022. https:\/\/doi.org\/10.18653\/v1\/2022.naacl-industry.26","DOI":"10.18653\/v1\/2022.naacl-industry.26"},{"key":"12_CR17","unstructured":"Stanislawek, T., et al.: Kleister: key information extraction datasets involving long documents with complex layouts. CoRR abs\/2105.05796 (2021). http:\/\/arxiv.org\/abs\/2105.05796"},{"key":"12_CR18","unstructured":"Vu, H.M., Nguyen, D.T.N.: Revising FUNSD dataset for key-value detection in document images. arXiv preprint arXiv:2010.05322 (2020)"},{"key":"12_CR19","unstructured":"Vu, H.M., Nguyen, D.T.: Revising FUNSD dataset for key-value detection in document images. CoRR abs\/2010.05322 (2020). http:\/\/arxiv.org\/abs\/2010.05322"},{"key":"12_CR20","doi-asserted-by":"publisher","unstructured":"Wang, Q., et al.: Learning to extract attribute value from product via question answering: a multi-task approach. In: Proceedings of the 26th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, pp. 47\u201355. KDD 2020, Association for Computing Machinery, New York, NY, USA (2020). https:\/\/doi.org\/10.1145\/3394486.3403047","DOI":"10.1145\/3394486.3403047"},{"key":"12_CR21","doi-asserted-by":"publisher","unstructured":"Wang, W.C., Mueller, J.: Detecting label errors in token classification data (2022). https:\/\/doi.org\/10.48550\/ARXIV.2210.03920, http:\/\/arxiv.org\/abs\/2210.03920","DOI":"10.48550\/ARXIV.2210.03920"},{"key":"12_CR22","doi-asserted-by":"publisher","unstructured":"Xu, Y., et al.: LayoutLMv2: multi-modal pre-training for visually-rich document understanding. In: Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers), pp. 2579\u20132591. Association for Computational Linguistics, August 2021. https:\/\/doi.org\/10.18653\/v1\/2021.acl-long.201","DOI":"10.18653\/v1\/2021.acl-long.201"},{"key":"12_CR23","doi-asserted-by":"publisher","unstructured":"Xu, Y., Li, M., Cui, L., Huang, S., Wei, F., Zhou, M.: LayoutLM: pre-training of text and layout for document image understanding. In: Proceedings of the 26th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, pp. 1192\u20131200. KDD 2020, Association for Computing Machinery, New York, NY, USA (2020). https:\/\/doi.org\/10.1145\/3394486.3403172","DOI":"10.1145\/3394486.3403172"},{"key":"12_CR24","unstructured":"Zaheer, M., et al.: Big bird: transformers for longer sequences. In: Larochelle, H., Ranzato, M., Hadsell, R., Balcan, M., Lin, H. (eds.) Advances in Neural Information Processing Systems. vol. 33, pp. 17283\u201317297. Curran Associates, Inc. (2020). www.proceedings.neurips.cc\/paper\/2020\/file\/c8512d142a2d849725f31a9a7a361ab9-Paper.pdf"},{"key":"12_CR25","unstructured":"Zaheer, M., et al.: Big bird: transformers for longer sequences. In: Larochelle, H., Ranzato, M., Hadsell, R., Balcan, M., Lin, H. (eds.) Advances in Neural Information Processing Systems. vol. 33, pp. 17283\u201317297. Curran Associates, Inc. (2020). www.proceedings.neurips.cc\/paper\/2020\/file\/c8512d142a2d849725f31a9a7a361ab9-Paper.pdf"},{"key":"12_CR26","doi-asserted-by":"publisher","unstructured":"Zeng, C., Li, S., Li, Q., Hu, J., Hu, J.: A survey on machine reading comprehension: tasks, evaluation metrics and benchmark datasets (2020). https:\/\/doi.org\/10.48550\/ARXIV.2006.11880, http:\/\/arxiv.org\/abs\/2006.11880","DOI":"10.48550\/ARXIV.2006.11880"},{"key":"12_CR27","doi-asserted-by":"publisher","unstructured":"Zhou, W., Chen, M.: Learning from noisy labels for entity-centric information extraction. In: Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing, pp. 5381\u20135392. Association for Computational Linguistics, Online and Punta Cana, Dominican Republic, November 2021. https:\/\/doi.org\/10.18653\/v1\/2021.emnlp-main.437","DOI":"10.18653\/v1\/2021.emnlp-main.437"}],"container-title":["Lecture Notes in Computer Science","Document Analysis and Recognition - ICDAR 2023"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-41679-8_12","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,8,18]],"date-time":"2023-08-18T07:25:09Z","timestamp":1692343509000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-41679-8_12"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031416781","9783031416798"],"references-count":27,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-41679-8_12","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"19 August 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICDAR","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Document Analysis and Recognition","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"San Jos\u00e9, CA","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"USA","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"21 August 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26 August 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icdar2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/icdar2023.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Easychair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"316","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"154","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"49% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2.89","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1.50","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Number and type of other papers accepted : IJDAR track papers","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}