{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T17:03:29Z","timestamp":1777655009155,"version":"3.51.4"},"publisher-location":"Cham","reference-count":23,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030865481","type":"print"},{"value":"9783030865498","type":"electronic"}],"license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021]]},"DOI":"10.1007\/978-3-030-86549-8_36","type":"book-chapter","created":{"date-parts":[[2021,9,4]],"date-time":"2021-09-04T02:05:57Z","timestamp":1630721157000},"page":"564-579","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":56,"title":["Kleister: Key Information Extraction Datasets Involving Long Documents with Complex Layouts"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1046-7563","authenticated-orcid":false,"given":"Tomasz","family":"Stanis\u0142awek","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8066-4533","authenticated-orcid":false,"given":"Filip","family":"Grali\u0144ski","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3407-7570","authenticated-orcid":false,"given":"Anna","family":"Wr\u00f3blewska","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0156-605X","authenticated-orcid":false,"given":"Dawid","family":"Lipi\u0144ski","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2856-8901","authenticated-orcid":false,"given":"Agnieszka","family":"Kaliska","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0231-5933","authenticated-orcid":false,"given":"Paulina","family":"Rosalska","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7126-9424","authenticated-orcid":false,"given":"Bartosz","family":"Topolski","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8423-1823","authenticated-orcid":false,"given":"Przemys\u0142aw","family":"Biecek","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2021,9,2]]},"reference":[{"key":"36_CR1","unstructured":"Akbik, A., Blythe, D., Vollgraf, R.: Contextual string embeddings for sequence labeling. In: Proceedings of the 27th International Conference on Computational Linguistics, pp. 1638\u20131649. Association for Computational Linguistics, Santa Fe, New Mexico, USA (August 2018), https:\/\/www.aclweb.org\/anthology\/C18-1139"},{"key":"36_CR2","unstructured":"Beltagy, I., Peters, M.E., Cohan, A.: Longformer: The long-document transformer. ArXiv arXiv:2004.05150 (2020)"},{"key":"36_CR3","doi-asserted-by":"crossref","unstructured":"Borchmann, L., et al.: Contract discovery: Dataset and a few-shot semantic retrieval challenge with competitive baselines. In: Cohn, T., He, Y., Liu, Y. (eds.) Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing: Findings, EMNLP 2020, Online Event, 16\u201320 November 2020, pp. 4254\u20134268. Association for Computational Linguistics (2020)","DOI":"10.18653\/v1\/2020.findings-emnlp.380"},{"key":"36_CR4","doi-asserted-by":"crossref","unstructured":"Dai, Z., Yang, Z., Yang, Y., Carbonell, J., Le, Q., Salakhutdinov, R.: Transformer-xl: Attentive language models beyond a fixed-length context. In: Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics (2019). https:\/\/www.aclweb.org\/anthology\/P19-1285","DOI":"10.18653\/v1\/P19-1285"},{"key":"36_CR5","unstructured":"Devlin, J., Chang, M., Lee, K., Toutanova, K.: BERT: pre-training of deep bidirectional transformers for language understanding. ArXiv arXiv:1810.04805 (2018)"},{"key":"36_CR6","doi-asserted-by":"publisher","unstructured":"Dwojak, T., Pietruszka, M., Borchmann, \u0141., Ch\u0142\u0119dowski, J., Grali\u0144ski, F.: From dataset recycling to multi-property extraction and beyond. In: Proceedings of the 24th Conference on Computational Natural Language Learning, pp. 641\u2013651. Association for Computational Linguistics, Online (November 2020). https:\/\/doi.org\/10.18653\/v1\/2020.conll-1.52, https:\/\/www.aclweb.org\/anthology\/2020.conll-1.52","DOI":"10.18653\/v1\/2020.conll-1.52"},{"key":"36_CR7","doi-asserted-by":"crossref","unstructured":"Garncarek, \u0141., et al.: LAMBERT: Layout-Aware (Language) Modeling using BERT for information extraction. ArXiv arXiv:2002.08087 (2020)","DOI":"10.1007\/978-3-030-86549-8_34"},{"key":"36_CR8","doi-asserted-by":"crossref","unstructured":"Hewlett, D., et al.: WikiReading: a novel large-scale language understanding task over Wikipedia. In: Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pp. 1535\u20131545. Association for Computational Linguistics, Berlin, Germany (2016)","DOI":"10.18653\/v1\/P16-1145"},{"key":"36_CR9","unstructured":"Holt, X., Chisholm, A.: Extracting structured data from invoices. In: Proceedings of the Australasian Language Technology Association Workshop 2018, pp. 53\u201359. Dunedin, New Zealand (December 2018). https:\/\/www.aclweb.org\/anthology\/U18-1006"},{"key":"36_CR10","unstructured":"Hugging Face: Transformers. https:\/\/github.com\/huggingface\/transformers (2020)"},{"key":"36_CR11","doi-asserted-by":"crossref","unstructured":"Jaume, G., Kemal Ekenel, H., Thiran, J.: FUNSD: A dataset for form understanding in noisy scanned documents. In: 2019 International Conference on Document Analysis and Recognition Workshops (ICDARW), vol. 2, pp. 1\u20136 (2019)","DOI":"10.1109\/ICDARW.2019.10029"},{"key":"36_CR12","doi-asserted-by":"crossref","unstructured":"Katti, A.R., Reisswig, C., Guder, C., Brarda, S., Bickel, S., H\u00f6hne, J., Faddoul, J.B.: Chargrid: Towards Understanding 2D Documents. ArXiv arXiv:1809.08799 (2018)","DOI":"10.18653\/v1\/D18-1476"},{"key":"36_CR13","doi-asserted-by":"crossref","unstructured":"Liu, X., Gao, F., Zhang, Q., Zhao, H.: Graph convolution for multimodal information extraction from visually rich documents. In: Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics (2019). http:\/\/dx.doi.org\/10.18653\/v1\/N19-2005","DOI":"10.18653\/v1\/N19-2005"},{"key":"36_CR14","unstructured":"Liu, Y., et al.: RoBERTa: A Robustly Optimized BERT Pretraining Approach. ArXiv arXiv:1907.11692 (2019)"},{"key":"36_CR15","doi-asserted-by":"crossref","unstructured":"Mathew, M., Karatzas, D., Jawahar, C.V.: DocVQA: A Dataset for VQA on Document Images. ArXiv arXiv:2007.00398 (2021)","DOI":"10.1109\/WACV48630.2021.00225"},{"key":"36_CR16","doi-asserted-by":"crossref","unstructured":"Palm, R.B., Laws, F., Winther, O.: Attend, copy, parse end-to-end information extraction from documents. In: International Conference on Document Analysis and Recognition (ICDAR) (2019)","DOI":"10.1109\/ICDAR.2019.00060"},{"key":"36_CR17","doi-asserted-by":"publisher","unstructured":"Palm, R.B., Winther, O., Laws, F.: Cloudscan - a configuration-free invoice analysis system using recurrent neural networks. In: 14th IAPR International Conference on Document Analysis and Recognition (ICDAR) (2017). https:\/\/doi.org\/10.1109\/icdar.2017.74","DOI":"10.1109\/icdar.2017.74"},{"key":"36_CR18","unstructured":"Park, S., et al.: CORD: a consolidated receipt dataset for post-OCR parsing. In: Document Intelligence Workshop at Neural Information Processing Systems (2019)"},{"key":"36_CR19","unstructured":"Smith, R.: Tesseract Open Source OCR Engine (2020). https:\/\/github.com\/tesseract-ocr\/tesseract"},{"key":"36_CR20","doi-asserted-by":"crossref","unstructured":"Tjong Kim Sang, E.F., De Meulder, F.: Introduction to the CoNLL-2003 Shared Task: Language-Independent Named Entity Recognition. In: Proceedings of the Seventh Conference of the North American Chapter of the Association for Computational Linguistics (2003)","DOI":"10.3115\/1119176.1119195"},{"key":"36_CR21","series-title":"Lecture Notes in Business Information Processing","doi-asserted-by":"publisher","first-page":"200","DOI":"10.1007\/978-3-030-58779-6_14","volume-title":"Business Process Management: Blockchain and Robotic Process Automation Forum","author":"C Wellmann","year":"2020","unstructured":"Wellmann, C., Stierle, M., Dunzer, S., Matzner, M.: A framework to evaluate the viability of\u00a0robotic process automation for\u00a0business process activities. In: Asatiani, A., et al. (eds.) BPM 2020. LNBIP, vol. 393, pp. 200\u2013214. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58779-6_14"},{"key":"36_CR22","doi-asserted-by":"publisher","unstructured":"Wr\u00f3blewska, A., Stanis\u0142awek, T., Prus-Zaj\u0105czkowski, B., Garncarek, \u0141.: Robotic process automation of unstructured data with machine learning. In: Position Papers of the 2018 Federated Conference on Computer Science and Information Systems, FedCSIS 2018, Pozna\u0144, Poland, 9\u201312 September 2018, pp. 9\u201316 (2018). https:\/\/doi.org\/10.15439\/2018F373","DOI":"10.15439\/2018F373"},{"key":"36_CR23","doi-asserted-by":"publisher","unstructured":"Xu, Y., Li, M., Cui, L., Huang, S., Wei, F., Zhou, M.: LayoutLM: pre-training of text and layout for document image understanding. In: Proceedings of the 26th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining (2020). https:\/\/doi.org\/10.1145\/3394486.3403172","DOI":"10.1145\/3394486.3403172"}],"container-title":["Lecture Notes in Computer Science","Document Analysis and Recognition \u2013 ICDAR 2021"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-86549-8_36","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,3]],"date-time":"2025-09-03T22:04:12Z","timestamp":1756937052000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-86549-8_36"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"ISBN":["9783030865481","9783030865498"],"references-count":23,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-86549-8_36","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021]]},"assertion":[{"value":"2 September 2021","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICDAR","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Document Analysis and Recognition","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Lausanne","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Switzerland","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2021","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"5 September 2021","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"10 September 2021","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icdar2021","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/iapr.org\/icdar2021","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"340","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"182","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"54% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2.9","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4.9","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Additionally, 13 competition reports are included.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}