{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,17]],"date-time":"2025-09-17T06:11:36Z","timestamp":1758089496628,"version":"3.44.0"},"publisher-location":"Cham","reference-count":29,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783032046239"},{"type":"electronic","value":"9783032046246"}],"license":[{"start":{"date-parts":[[2025,9,17]],"date-time":"2025-09-17T00:00:00Z","timestamp":1758067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,9,17]],"date-time":"2025-09-17T00:00:00Z","timestamp":1758067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-032-04624-6_33","type":"book-chapter","created":{"date-parts":[[2025,9,16]],"date-time":"2025-09-16T05:33:12Z","timestamp":1758000792000},"page":"563-576","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["DocAnnot - Accelerating the\u00a0Creation of\u00a0Key Information Extraction Datasets with\u00a0GenAI-Powered Auto-annotation"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-0791-4808","authenticated-orcid":false,"given":"Siddartha","family":"Reddy","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1844-9503","authenticated-orcid":false,"given":"P. M.","family":"Harikrishnan","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0009-7668-8482","authenticated-orcid":false,"given":"Goutham","family":"Vignesh","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0005-3598-266X","authenticated-orcid":false,"given":"V.","family":"Varun","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0005-0214-5056","authenticated-orcid":false,"given":"Vishal","family":"Vaddina","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,9,17]]},"reference":[{"key":"33_CR1","unstructured":"The claude 3 model family: Opus, sonnet, haiku. https:\/\/api.semanticscholar.org\/CorpusID:268232499"},{"key":"33_CR2","unstructured":"Achiam, J., et al.: GPT-4 technical report. arXiv preprint arXiv:2303.08774 (2023)"},{"key":"33_CR3","doi-asserted-by":"crossref","unstructured":"Appalaraju, S., Tang, P., Dong, Q., Sankaran, N., Zhou, Y., Manmatha, R.: Docformerv2: local features for document understanding (2023). https:\/\/arxiv.org\/abs\/2306.01733","DOI":"10.1609\/aaai.v38i2.27828"},{"key":"33_CR4","doi-asserted-by":"crossref","unstructured":"Appalaraju, S., Tang, P., Dong, Q., Sankaran, N., Zhou, Y., Manmatha, R.: Docformerv2: local features for document understanding. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a038, pp. 709\u2013718 (2024)","DOI":"10.1609\/aaai.v38i2.27828"},{"key":"33_CR5","unstructured":"Bai, S., et al.: Qwen2. 5-vl technical report. arXiv preprint arXiv:2502.13923 (2025)"},{"key":"33_CR6","unstructured":"Bensch, O., Popa, M., Spille, C.: Key information extraction from documents: evaluation and generator (2021). https:\/\/arxiv.org\/abs\/2106.14624"},{"key":"33_CR7","doi-asserted-by":"crossref","unstructured":"Cao, P., Wang, Y., Zhang, Q., Meng, Z.: Genkie: robust generative multimodal document key information extraction. In: Conference on Empirical Methods in Natural Language Processing (2023). https:\/\/api.semanticscholar.org\/CorpusID:264487294","DOI":"10.18653\/v1\/2023.findings-emnlp.979"},{"key":"33_CR8","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: Bert: pre-training of deep bidirectional transformers for language understanding (2019). https:\/\/arxiv.org\/abs\/1810.04805"},{"key":"33_CR9","unstructured":"Goel, A., et al.: LLMs accelerate annotation for medical information extraction. In: Machine Learning for Health (ML4H), pp. 82\u2013100. PMLR (2023)"},{"key":"33_CR10","doi-asserted-by":"publisher","unstructured":"Hochreiter, S., Schmidhuber, J.: Long short-term memory. Neural Comput. 9(8), 1735\u20131780 (1997). https:\/\/doi.org\/10.1162\/neco.1997.9.8.1735","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"33_CR11","doi-asserted-by":"publisher","unstructured":"Huang, Y., Lv, T., Cui, L., Lu, Y., Wei, F.: Layoutlmv3: pre-training for document AI with unified text and image masking. In: Proceedings of the 30th ACM International Conference on Multimedia, MM 2022, pp. 4083\u20134091. Association for Computing Machinery, New York (2022). https:\/\/doi.org\/10.1145\/3503161.3548112","DOI":"10.1145\/3503161.3548112"},{"key":"33_CR12","doi-asserted-by":"publisher","unstructured":"Huang, Z., et al.: ICDAR2019 competition on scanned receipt OCR and information extraction. In: 2019 International Conference on Document Analysis and Recognition (ICDAR). IEEE (2019). https:\/\/doi.org\/10.1109\/icdar.2019.00244","DOI":"10.1109\/icdar.2019.00244"},{"key":"33_CR13","doi-asserted-by":"crossref","unstructured":"Jaume, G., Ekenel, H.K., Thiran, J.P.: FUNSD: a dataset for form understanding in noisy scanned documents (2019). https:\/\/arxiv.org\/abs\/1905.13538","DOI":"10.1109\/ICDARW.2019.10029"},{"key":"33_CR14","doi-asserted-by":"publisher","unstructured":"Kim, S., et al.: DocKD: knowledge distillation from LLMs for open-world document understanding models. In: Al-Onaizan, Y., Bansal, M., Chen, Y.N. (eds.) Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing, Miami, Florida, USA, pp. 3167\u20133193. Association for Computational Linguistics (2024). https:\/\/doi.org\/10.18653\/v1\/2024.emnlp-main.185. https:\/\/aclanthology.org\/2024.emnlp-main.185\/","DOI":"10.18653\/v1\/2024.emnlp-main.185"},{"key":"33_CR15","first-page":"34892","volume":"36","author":"H Liu","year":"2023","unstructured":"Liu, H., Li, C., Wu, Q., Lee, Y.J.: Visual instruction tuning. Adv. Neural. Inf. Process. Syst. 36, 34892\u201334916 (2023)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"33_CR16","doi-asserted-by":"crossref","unstructured":"Mathew, M., Karatzas, D., Jawahar, C.V.: Docvqa: a dataset for VQA on document images (2021). https:\/\/arxiv.org\/abs\/2007.00398","DOI":"10.1109\/WACV48630.2021.00225"},{"key":"33_CR17","unstructured":"M\u00fcller, R., Kornblith, S., Hinton, G.E.: When does label smoothing help? In: Advances in Neural Information Processing Systems, vol. 32 (2019)"},{"key":"33_CR18","doi-asserted-by":"crossref","unstructured":"Naparstek, O., et al.: KVP10k: a comprehensive dataset for key-value pair extraction in business documents. In: International Conference on Document Analysis and Recognition, pp. 97\u2013116. Springer (2024)","DOI":"10.1007\/978-3-031-70533-5_7"},{"key":"33_CR19","unstructured":"O\u2019Shea, K., Nash, R.: An introduction to convolutional neural networks (2015). https:\/\/arxiv.org\/abs\/1511.08458"},{"key":"33_CR20","doi-asserted-by":"publisher","unstructured":"Panda, S., Behera, V., Pradhan, A., Mohanty, A.: A rule-based information extraction system. Int. J. Innov. Technol. Explor. Eng. 8, 1613\u20131617 (2019). https:\/\/doi.org\/10.35940\/ijitee.I8156.078919","DOI":"10.35940\/ijitee.I8156.078919"},{"key":"33_CR21","unstructured":"Park, S., et al.: CORD: a consolidated receipt dataset for post-OCR parsing. In: Workshop on Document Intelligence at NeurIPS 2019 (2019). https:\/\/openreview.net\/forum?id=SJl3z659UH"},{"key":"33_CR22","unstructured":"Rouzegar, H., Makrehchi, M.: Enhancing text classification through LLM-driven active learning and human annotation (2024). https:\/\/arxiv.org\/abs\/2406.12114"},{"key":"33_CR23","doi-asserted-by":"crossref","unstructured":"Skalick\u1ef3, M., \u0160imsa, \u0160., U\u0159i\u010d\u00e1\u0159, M., \u0160ulc, M.: Business document information extraction: towards practical benchmarks. In: International Conference of the Cross-Language Evaluation Forum for European Languages, pp. 105\u2013117. Springer (2022)","DOI":"10.1007\/978-3-031-13643-6_8"},{"key":"33_CR24","unstructured":"Tang, Z., et al.: Unifying vision, text, and layout for universal document processing (2023). https:\/\/arxiv.org\/abs\/2212.02623"},{"key":"33_CR25","unstructured":"Team, G., et al.: Gemini: a family of highly capable multimodal models. arXiv preprint arXiv:2312.11805 (2023)"},{"key":"33_CR26","unstructured":"Vaswani, A., et al.: Attention is all you need (2023). https:\/\/arxiv.org\/abs\/1706.03762"},{"key":"33_CR27","doi-asserted-by":"crossref","unstructured":"Xu, Y., et al.: Layoutlmv2: multi-modal pre-training for visually-rich document understanding (2022). https:\/\/arxiv.org\/abs\/2012.14740","DOI":"10.18653\/v1\/2021.acl-long.201"},{"key":"33_CR28","doi-asserted-by":"publisher","unstructured":"Xu, Y., Li, M., Cui, L., Huang, S., Wei, F., Zhou, M.: Layoutlm: pre-training of text and layout for document image understanding. In: Proceedings of the 26th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining, KDD 2020, pp. 1192\u20131200. ACM (2020). https:\/\/doi.org\/10.1145\/3394486.3403172","DOI":"10.1145\/3394486.3403172"},{"key":"33_CR29","unstructured":"Zhang, Q., et al.: Document parsing unveiled: techniques, challenges, and prospects for structured information extraction (2024). https:\/\/arxiv.org\/abs\/2410.21169"}],"container-title":["Lecture Notes in Computer Science","Document Analysis and Recognition \u2013 ICDAR 2025"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-04624-6_33","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,16]],"date-time":"2025-09-16T05:33:21Z","timestamp":1758000801000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-04624-6_33"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,9,17]]},"ISBN":["9783032046239","9783032046246"],"references-count":29,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-04624-6_33","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2025,9,17]]},"assertion":[{"value":"17 September 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICDAR","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Document Analysis and Recognition","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Wuhan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16 September 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"21 September 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icdar2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/iapr.org\/icdar2025","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}