{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,20]],"date-time":"2026-02-20T18:27:42Z","timestamp":1771612062491,"version":"3.50.1"},"publisher-location":"Cham","reference-count":29,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783032046161","type":"print"},{"value":"9783032046178","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,9,15]],"date-time":"2025-09-15T00:00:00Z","timestamp":1757894400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,9,15]],"date-time":"2025-09-15T00:00:00Z","timestamp":1757894400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-032-04617-8_2","type":"book-chapter","created":{"date-parts":[[2025,9,15]],"date-time":"2025-09-15T11:46:04Z","timestamp":1757936764000},"page":"23-39","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["CM1 - A Dataset for\u00a0Evaluating Few-Shot Information Extraction with\u00a0Large Vision Language Models"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8842-3718","authenticated-orcid":false,"given":"Fabian","family":"Wolf","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8892-3306","authenticated-orcid":false,"given":"Oliver","family":"T\u00fcselmann","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0009-6028-7502","authenticated-orcid":false,"given":"Arthur","family":"Matei","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1516-8183","authenticated-orcid":false,"given":"Lukas","family":"Hennies","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9492-907X","authenticated-orcid":false,"given":"Christoph","family":"Rass","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7446-7813","authenticated-orcid":false,"given":"Gernot A.","family":"Fink","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,9,15]]},"reference":[{"key":"2_CR1","unstructured":"Bai, S., et al.: Qwen2.5-VL technical report. CoRR abs\/2502.13923 (2025)"},{"key":"2_CR2","unstructured":"Beyer, L., et al.: PaliGemma: A versatile 3B VLM for transfer. CoRR abs\/2407.07726 (2024)"},{"key":"2_CR3","unstructured":"Borggr\u00e4fe, H.: Exploring pathways of (forced) migration, resettlement structures, and displaced persons\u2019 agency. Hist. Soc. Res.\/Historische Sozialforschung 45(4), 45\u201368 (2020)"},{"key":"2_CR4","doi-asserted-by":"crossref","unstructured":"Boros, E., et al.: A comparison of sequential and combined approaches for named entity recognition in a corpus of handwritten medieval charters. In: Proceedings of the International Conference on Frontiers in Handwriting Recognition, pp. 79\u201384. Dortmund, Germany (2020)","DOI":"10.1109\/ICFHR2020.2020.00025"},{"key":"2_CR5","unstructured":"Brown, T., et al.: Language models are few-shot learners. In: Advances in Neural Information Processing Systems, vol.\u00a033, pp. 1877\u20131901. Virtual Event (2020)"},{"issue":"12","key":"2_CR6","doi-asserted-by":"publisher","DOI":"10.1007\/s11432-024-4231-5","volume":"67","author":"Z Chen","year":"2024","unstructured":"Chen, Z., et al.: How far are we to GPT-4V? Closing the gap to commercial multimodal models with open-source suites. Sci. China Inf. Sci. 67(12), 220101 (2024)","journal-title":"Sci. China Inf. Sci."},{"key":"2_CR7","doi-asserted-by":"crossref","unstructured":"Coquenet, D., Chatelain, C., Paquet, T.: DAN: a segmentation-free document attention network for handwritten document recognition. IEEE Trans. Pattern Anal. Mach. Intell. 45(7), 8227\u20138243 (2023)","DOI":"10.1109\/TPAMI.2023.3235826"},{"key":"2_CR8","doi-asserted-by":"crossref","unstructured":"Davis, B.L., Morse, B.S., Price, B.L., Tensmeyer, C., Wigington, C., Morariu, V.I.: End-to-end document recognition and understanding with Dessurt. In: Proceedings of the European Conference on Computer Vision, pp. 280\u2013296. Tel Aviv, Israel (2022)","DOI":"10.1007\/978-3-031-25069-9_19"},{"key":"2_CR9","unstructured":"Dehghani, M., et al.: Patch n\u2019 pack: Navit, a vision transformer for any aspect ratio and resolution. In: Advances in Neural Information Processing Systems, vol.\u00a036, pp. 2252\u20132274. New Orleans, LA, USA (2023)"},{"key":"2_CR10","unstructured":"Dettmers, T., Pagnoni, A., Holtzman, A., Zettlemoyer, L.: QLoRA: efficient finetuning of quantized LLMs. In: Advances in Neural Information Processing Systems, vol.\u00a036, pp. 10088\u201310115. New Orleans, LA, USA (2023)"},{"key":"2_CR11","unstructured":"Dosovitskiy, A., et al.: An image is worth 16x16 words: transformers for image recognition at scale. In: International Conference on Learning Representations. Virtual Event, Austria (2021)"},{"key":"2_CR12","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Computer Social Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778. Las Vegas, NV, USA (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"2_CR13","unstructured":"Hu, E.J., et al.: LoRA: low-rank adaptation of large language models. In: International Conference on Learning Representations. Virtual Event (2022)"},{"issue":"4","key":"2_CR14","doi-asserted-by":"publisher","first-page":"718","DOI":"10.1080\/01419870.2024.2404488","volume":"48","author":"S Huhn","year":"2025","unstructured":"Huhn, S., Rass, C.: Displaced person (s): the production of a powerful political category. Ethn. Racial Stud. 48(4), 718\u2013739 (2025)","journal-title":"Ethn. Racial Stud."},{"key":"2_CR15","doi-asserted-by":"crossref","unstructured":"Kim, G., et al.: OCR-free document understanding transformer. In: Proceedings of the European Conference on Computer Vision, pp. 498\u2013517. Tel Aviv, Israel (2022)","DOI":"10.1007\/978-3-031-19815-1_29"},{"key":"2_CR16","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization. In: International Conference on Learning Representations. San Diego, CA, USA (2015)"},{"key":"2_CR17","unstructured":"Lee, K., et al.: Pix2Struct: screenshot parsing as pretraining for visual language understanding. In: International Conference on Machine Learning, pp. 18893\u201318912. Honolulu, HI, USA (2023)"},{"key":"2_CR18","doi-asserted-by":"crossref","unstructured":"Lewis, M., et al.: BART: denoising sequence-to-sequence pre-training for natural language generation, translation, and comprehension. In: Annual Meeting of the Association for Computational Linguistics, pp. 7871\u20137880. Virtual Event (2020)","DOI":"10.18653\/v1\/2020.acl-main.703"},{"key":"2_CR19","doi-asserted-by":"crossref","unstructured":"Liu, Z., et al.: Swin transformer: hierarchical vision transformer using shifted windows. In: Proceedings if the International Conference on Computer Vision, pp. 9992\u201310002. Virtual Event (2021)","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"2_CR20","unstructured":"Loshchilov, I., Hutter, F.: Decoupled weight decay regularization. In: International Conference on Learning Representations. New Orleans, LA, USA (2019)"},{"key":"2_CR21","doi-asserted-by":"crossref","unstructured":"Mathew, M., Karatzas, D., Jawahar, C.V.: DocVQA: a dataset for VQA on document images. In: IEEE Winter Conf. on Applications of Computer Vision, pp. 2199\u20132208. Waikoloa, HI, USA (2022)","DOI":"10.1109\/WACV48630.2021.00225"},{"key":"2_CR22","unstructured":"Mesnard, T., et\u00a0al.: Gemma: Open models based on Gemini research and technology. CoRR abs\/2403.08295 (2024)"},{"key":"2_CR23","unstructured":"Rass, C., Tames, I.: Negotiating the aftermath of forced migration. Hist. Soc. Res.\/Historische Sozialforschung 45(4 (174), 7\u201344 (2020)"},{"key":"2_CR24","doi-asserted-by":"crossref","unstructured":"Tarride, S., Boillet, M., Kermorvant, C.: Key-value information extraction from full handwritten pages. In: Proceedings of the International Conferenvce on Document Analysis and Recognition, pp. 185\u2013204. San Jos\u00e9, CA, USA (2023)","DOI":"10.1007\/978-3-031-41679-8_11"},{"key":"2_CR25","doi-asserted-by":"crossref","unstructured":"Tarride, S., Lemaitre, A., Co\u00fcasnon, B., Tardivel, S.: A comparative study of information extraction strategies using an attention-based neural network. In: Proceedings of the International Workshop on Document Analysis Systems, pp. 644\u2013658. La Rochelle, France (2022)","DOI":"10.1007\/978-3-031-06555-2_43"},{"key":"2_CR26","doi-asserted-by":"crossref","unstructured":"Tarride, S., Maarand, M., Boillet, M., McGrath, J., Capel, E., V\u00e9zina, H., Kermorvant, C.: Large-scale genealogical information extraction from handwritten Quebec parish records. Int. J. Document Anal. Recognit. 26(3), 255\u2013272 (2023)","DOI":"10.1007\/s10032-023-00427-w"},{"key":"2_CR27","unstructured":"Yang, A., et al.: Qwen2.5 technical report. CoRR abs\/2412.15115 (2024)"},{"key":"2_CR28","doi-asserted-by":"crossref","unstructured":"Yin, S., et al.: A survey on multimodal large language models. Nat. Sci. Rev. 11(12), nwae403 (2024)","DOI":"10.1093\/nsr\/nwae403"},{"key":"2_CR29","doi-asserted-by":"crossref","unstructured":"Zhai, X., Mustafa, B., Kolesnikov, A., Beyer, L.: Sigmoid loss for language image pre-training. In: Proceedings of the International Conference on Computer Vision, pp. 11941\u201311952. Paris, France (2023)","DOI":"10.1109\/ICCV51070.2023.01100"}],"container-title":["Lecture Notes in Computer Science","Document Analysis and Recognition \u2013 ICDAR 2025"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-04617-8_2","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,15]],"date-time":"2025-09-15T11:46:13Z","timestamp":1757936773000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-04617-8_2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,9,15]]},"ISBN":["9783032046161","9783032046178"],"references-count":29,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-04617-8_2","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,9,15]]},"assertion":[{"value":"15 September 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICDAR","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Document Analysis and Recognition","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Wuhan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16 September 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"21 September 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icdar2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/iapr.org\/icdar2025","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}