{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,13]],"date-time":"2026-05-13T02:00:51Z","timestamp":1778637651609,"version":"3.51.4"},"publisher-location":"Cham","reference-count":26,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031724398","type":"print"},{"value":"9783031724404","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-3-031-72440-4_1","type":"book-chapter","created":{"date-parts":[[2024,9,24]],"date-time":"2024-09-24T17:01:50Z","timestamp":1727197310000},"page":"3-12","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Exploring the\u00a0Capabilities of\u00a0GPT4-Vision as\u00a0OCR Engine"],"prefix":"10.1007","author":[{"given":"Alex","family":"Ghiriti","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9417-5316","authenticated-orcid":false,"given":"Wolfgang","family":"G\u00f6derle","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0202-6100","authenticated-orcid":false,"given":"Roman","family":"Kern","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,9,25]]},"reference":[{"key":"1_CR1","unstructured":"Brown, T.B., et\u00a0al.: Language models are few-shot learners. In: Advances in Neural Information Processing Systems, vol. 33, pp. 1877\u20131901 (2020)"},{"key":"1_CR2","unstructured":"Touvron, H., et\u00a0al.: LLAMA 2: Open foundation and fine-tuned chat models. arXiv preprint arXiv:2307.09288 (2023)"},{"key":"1_CR3","doi-asserted-by":"crossref","unstructured":"Smith, R.: An overview of the tesseract OCR engine. In: Ninth International Conference on Document Analysis and Recognition (ICDAR 2007), vol.\u00a02, pp. 629\u2013633. IEEE (2007)","DOI":"10.1109\/ICDAR.2007.4376991"},{"key":"1_CR4","doi-asserted-by":"crossref","unstructured":"Smith, R.W.: History of the tesseract OCR engine: what worked and what didn\u2019t. In Document Recognition and Retrieval XX, vol. 8658, p. 865802. SPIE (2013)","DOI":"10.1117\/12.2010051"},{"key":"1_CR5","doi-asserted-by":"publisher","unstructured":"Boros, E., Nguyen, N.K., Lejeune, G., Doucet, A.: Assessing the impact of OCR noise on multilingual event detection over digitised documents. Int. J. Digit. Lib., 1\u201326 (2022). https:\/\/doi.org\/10.1007\/s00799-022-00325-2","DOI":"10.1007\/s00799-022-00325-2"},{"key":"1_CR6","unstructured":"Poncelas, A., Aboomar, M., Buts, J., Hadley, J., Way, A.: A tool for facilitating OCR postediting in historical documents. In: LREC 2020 Workshop Language Resources and Evaluation Conference 11\u201316 May 2020, p.\u00a047 (2020)"},{"key":"1_CR7","doi-asserted-by":"publisher","unstructured":"Hegghammer, T.: OCR with Tesseract, Amazon Textract, and Google Document AI: a benchmarking experiment. J. Comput. Soc. Sci., 1\u201322 (2021). https:\/\/doi.org\/10.1007\/s42001-021-00149-1","DOI":"10.1007\/s42001-021-00149-1"},{"issue":"1","key":"1_CR8","doi-asserted-by":"publisher","first-page":"97","DOI":"10.21248\/jlcl.33.2018.220","volume":"33","author":"U Springmann","year":"2018","unstructured":"Springmann, U., Reul, C., Dipper, S., Baiter, J.: Ground truth for training OCR engines on historical documents in German fraktur and early modern Latin. J. Lang. Technol. Comput. Linguist. 33(1), 97\u2013114 (2018)","journal-title":"J. Lang. Technol. Comput. Linguist."},{"key":"1_CR9","doi-asserted-by":"crossref","unstructured":"Neudecker, C., et al.: OCR-D: An end-to-end open source OCR framework for historical printed documents. In: Proceedings of the 3rd International Conference on Digital Access to Textual Cultural Heritage, pp. 53\u201358 (2019)","DOI":"10.1145\/3322905.3322917"},{"key":"1_CR10","unstructured":"Fleischhacker, D., Goederle, W., Kern, R.: Improving OCR quality in 19th century historical documents using a combined machine learning based approach. arXiv preprint arXiv:2401.07787 (2024)"},{"key":"1_CR11","doi-asserted-by":"publisher","unstructured":"Naiman, J.P., Cosillo, M.G., Williams, P.K.G., Goodman, A. (2023). Large Synthetic Data from the ar $$\\chi $$ iv for OCR Post Correction of Historic Scientific Articles. In: Alonso, O., Cousijn, H., Silvello, G., Marrero, M., Teixeira Lopes, C., Marchesin, S. (eds.) Linking Theory and Practice of Digital Libraries. TPDL 2023. LNCS, vol. 14241, pp. 265\u2013274. Springer, Cham (2023). https:\/\/doi.org\/10.1007\/978-3-031-43849-3_23","DOI":"10.1007\/978-3-031-43849-3_23"},{"key":"1_CR12","unstructured":"Yang, Z., et al: The dawn of LMMs: preliminary explorations with GPT-4V(ision). arXiv preprint arXiv:2309.17421, 9(1):1, 2023"},{"key":"1_CR13","unstructured":"Zhang, X., et al.: GPT-4V(ision) as a generalist evaluator for vision-language tasks. arXiv preprint arXiv:2311.01361 (2023)"},{"key":"1_CR14","unstructured":"Liu, Y., et al.: On the hidden mystery of OCR in large multimodal models. http:\/\/arxiv.org\/abs\/2305.07895"},{"key":"1_CR15","unstructured":"Shi, Y., et al.: Exploring OCR capabilities of GPT-4V(ision) : a quantitative and in-depth evaluation. http:\/\/arxiv.org\/abs\/2310.16809"},{"key":"1_CR16","unstructured":"Claude. https:\/\/skpu.unipu.hr\/skpu\/de\/istrische_zeitungen_zeitschriften_und_jahrbcher_in_deutscher_sprache_1848-1943"},{"key":"1_CR17","first-page":"37","volume":"62","author":"G Marauschek","year":"1988","unstructured":"Marauschek, G.: Die stadt graz und die steirischen landesfremdenverkehrsvereine. Blaetter f\u00fcr Heimatkunde 62, 37\u201348 (1988)","journal-title":"Blaetter f\u00fcr Heimatkunde"},{"key":"1_CR18","doi-asserted-by":"crossref","unstructured":"Mader, B.: \u201cnaturschutzpark meleda\u201d 1910-1915. das alt\u00f6sterreichische projekt zur errichtung eines naturreservates auf der s\u00fcddalmatinischen insel meleda (mljet) als vorl\u00e4ufer des zeitgen\u00f6ssischen nationalparks. Anzeiger der philosophisch-historischen Klasse 140, 5\u201322 (2006)","DOI":"10.1553\/anzeiger140_1s5"},{"key":"1_CR19","unstructured":"Jenkins, F., Kanai, J., Nartker, T.: Using ideal images to establish a baseline of OCR performance. 47\u201354 (1993)"},{"key":"1_CR20","doi-asserted-by":"publisher","unstructured":"Zhu, W., Sokhandan, N., Yang, G., Martin, S., Sathyanarayana, S.: DocBed: A multi-stage OCR solution for documents with complex layouts. 36(11), 12643\u201312649. ISSN 2374-3468. https:\/\/doi.org\/10.1609\/aaai.v36i11.21539. Number: 11","DOI":"10.1609\/aaai.v36i11.21539"},{"key":"1_CR21","doi-asserted-by":"publisher","unstructured":"Lu, T., Dooms, A.: Towards physical distortion identification and removal in document images. In: 2018 7th European Workshop on Visual Information Processing (EUVIP), pp. 1\u20136. https:\/\/doi.org\/10.1109\/EUVIP.2018.8611786. ISSN: 2471-8963","DOI":"10.1109\/EUVIP.2018.8611786"},{"key":"1_CR22","doi-asserted-by":"publisher","unstructured":"Manwatkar, P.M., Singh, K.R.: A technical review on text recognition from images. In: 2015 IEEE 9th International Conference on Intelligent Systems and Control (ISCO), pp. 1\u20135 (2015). https:\/\/doi.org\/10.1109\/ISCO.2015.7282362","DOI":"10.1109\/ISCO.2015.7282362"},{"key":"1_CR23","doi-asserted-by":"publisher","unstructured":"Nagabhushan, P.,\u00a0Nirmala, S.: Text extraction in complex color document images for enhanced readability. 2(2), 120 (2010). https:\/\/doi.org\/10.4236\/iim.2010.22015, http:\/\/www.scirp.org\/journal\/PaperInformation.aspx?PaperID=1409abstract. Number: 02 Publisher: Scientific Research Publishing","DOI":"10.4236\/iim.2010.22015"},{"key":"1_CR24","doi-asserted-by":"publisher","unstructured":"Jirasuwankul, N.: Effect of text orientation to OCR error and anti-skew of text using projective transform technique. In: 2011 IEEE\/ASME International Conference on Advanced Intelligent Mechatronics (AIM), pp. 856\u2013861. https:\/\/doi.org\/10.1109\/AIM.2011.6027057, https:\/\/ieeexplore.ieee.org\/abstract\/document\/6027057. ISSN: 2159-6255","DOI":"10.1109\/AIM.2011.6027057"},{"key":"1_CR25","doi-asserted-by":"publisher","unstructured":"Journet, N., Visani, M., Mansencal, B., Van-Cuong, K., Billy, A.: DocCreator: a new software for creating synthetic ground-truthed document images. 3(4), 62. ISSN 2313-433X. https:\/\/doi.org\/10.3390\/jimaging3040062, https:\/\/www.mdpi.com\/2313-433X\/3\/4\/62. Number: 4 Publisher: Multidisciplinary Digital Publishing Institute","DOI":"10.3390\/jimaging3040062"},{"key":"1_CR26","unstructured":"Peyrard, C.: Single image super-resolution based on neural networks for text and face recognition. https:\/\/theses.hal.science\/tel-01974040"}],"container-title":["Lecture Notes in Computer Science","Linking Theory and Practice of Digital Libraries"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-72440-4_1","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,24]],"date-time":"2024-09-24T17:02:04Z","timestamp":1727197324000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-72440-4_1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9783031724398","9783031724404"],"references-count":26,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-72440-4_1","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"25 September 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"TPDL","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Theory and Practice of Digital Libraries","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Ljubljana","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Slovenia","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"24 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27 September 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"tpdl2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/tpdl2024.nuk.si\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}