{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,22]],"date-time":"2025-10-22T23:20:35Z","timestamp":1761175235201,"version":"build-2065373602"},"reference-count":24,"publisher":"Polish Information Processing Society","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"DOI":"10.15439\/2025f0064","type":"proceedings-article","created":{"date-parts":[[2025,10,22]],"date-time":"2025-10-22T08:15:38Z","timestamp":1761120938000},"page":"145-152","source":"Crossref","is-referenced-by-count":0,"title":["Towards a German VET Archive and its Integration into a\nData Warehouse"],"prefix":"10.15439","volume":"45","author":[{"given":"Thomas","family":"Reiser","sequence":"first","affiliation":[{"name":"University of Koblenz"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Petra","family":"Steiner","sequence":"additional","affiliation":[{"name":"Federal Institute for Vocational Education and Training"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kristine","family":"Hein","sequence":"additional","affiliation":[{"name":"Federal Institute for Vocational Education and Training"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"6175","published-online":{"date-parts":[[2025,10,15]]},"reference":[{"key":"ref1","doi-asserted-by":"crossref","unstructured":"T. Reiser, J. D\u00f6rpinghaus, P. Steiner, and M. Tiemann, \u201cTowards a\ndatatset of digitalized historical german vet and cvet regulations,\u201d Data,\nvol. 9, no. 11, 2024.","DOI":"10.3390\/data9110128"},{"key":"ref2","unstructured":"T. Reiser, J. D\u00f6rpinghaus, and P. Steiner, \u201cAnalyzing historical legal\ntextcorpora: German vet and cvet regulations,\u201d in INFORMATIK 2024.\nGesellschaft f\u00fcr Informatik eV, 2024, pp. 2007\u20132018."},{"key":"ref3","unstructured":"T. Reiser, J. D\u00f6rpinghaus, and P. Steiner, \u201cLearning from historical vet and cvet regulations in germany:\nWhat should vet look like and whom should it serve?\u201d in NORDYRK\n2024 BOOK OF ABSTRACTS, 2024, p. 75."},{"key":"ref4","unstructured":"M. Koistinen, K. Kettunen, and J. Kervinen, \u201cHow to Improve Optical\nCharacter Recognition of Historical Finnish Newspapers Using Open\nSource Tesseract OCR Engine,\u201d Proc. of LTC, pp. 279\u2013283, 2017."},{"key":"ref5","unstructured":"A. Nabizai and H.-G. Fill, \u201cEine Modellierungsmethode zur\nVisualisierung und Analyse von Gesetzestexten,\u201d Jusletter IT, February\n2017. [Online]. Available: http:\/\/eprints.cs.univie.ac.at\/5131\/"},{"key":"ref6","doi-asserted-by":"publisher","unstructured":"V. N. Sai Rakesh Kamisetty, B. Sohan Chidvilas, S. Revathy, P. Jeyanthi,\nV. M. Anu, and L. Mary Gladence, \u201cDigitization of Data from Invoice\nusing OCR,\u201d in 2022 6th International Conference on Computing\nMethodologies and Communication (ICCMC), 2022. https:\/\/dx.doi.org\/10.1109\/ICCMC53470.2022.9754117 pp. 1\u201310.","DOI":"10.1109\/ICCMC53470.2022.9754117"},{"key":"ref7","doi-asserted-by":"publisher","unstructured":"H. Hamann, \u201cThe German Federal Courts Dataset 1950\u20132019: From\nPaper Archives to Linked Open Data,\u201d Journal of empirical legal studies,\nvol. 16, no. 3, pp. 671\u2013688, 2019. https:\/\/dx.doi.org\/10.1111\/jels.12230","DOI":"10.1111\/jels.12230"},{"key":"ref8","doi-asserted-by":"publisher","unstructured":"C. Reul, D. Christ, A. Hartelt, N. Balbach, M. Wehner, U. Springmann, \nC. Wick, C. Grundig, A. B\u00fcttner, and F. Puppe, \u201cOCR4all\u2014An Open-Source Tool Providing a (Semi-)Automatic OCR Workflow for Histor- \nical Printings,\u201d Applied Sciences, vol. 9, no. 22, p. 4853, 2019. https:\/\/dx.doi.org\/10.3390\/app9224853","DOI":"10.3390\/app9224853"},{"key":"ref9","doi-asserted-by":"publisher","unstructured":"J. M. Jayoma, E. S. Moyon, and E. M. O. Morales, \u201cOCR Based \nDocument Archiving and Indexing Using PyTesseract: A Record Management System for DSWD Caraga, Philippines,\u201d in 2020 IEEE 12th \nInternational Conference on Humanoid, Nanotechnology, Information \nTechnology, Communication and Control, Environment, and Management (HNICEM), 2020. https:\/\/dx.doi.org\/10.1109\/HNICEM51456.2020.9400000 pp. \n1\u20136.","DOI":"10.1109\/HNICEM51456.2020.9400000"},{"key":"ref10","doi-asserted-by":"publisher","unstructured":"S. Van Nguyen, D. A. Nguyen, and L. S. Q. Pham, \u201cDigitalization of \nAdministrative Documents A Digital Transformation Step in Practice,\u201d \nin 2021 8th NAFOSTED Conference on Information and Computer \nScience (NICS), 2021. https:\/\/dx.doi.org\/10.1109\/NICS54270.2021.9701547 pp. 519\u2013524.","DOI":"10.1109\/NICS54270.2021.9701547"},{"key":"ref11","doi-asserted-by":"publisher","unstructured":"S. Tsujimoto and H. Asada, \u201cMajor components of a complete text \nreading system,\u201d Proceedings of the IEEE, vol. 80, no. 7, pp. 1133\u20131149, 1992. https:\/\/dx.doi.org\/10.1109\/5.156475","DOI":"10.1109\/5.156475"},{"key":"ref12","doi-asserted-by":"publisher","unstructured":"J. v. Beusekom, D. Keysers, F. Shafait, and T. Breuel, \u201cExample-based \nlogical labeling of document title page images,\u201d in Ninth International \nConference on Document Analysis and Recognition (ICDAR 2007), \nvol. 2, 2007. https:\/\/dx.doi.org\/10.1109\/ICDAR.2007.4377049 pp. 919\u2013923.","DOI":"10.1109\/ICDAR.2007.4377049"},{"key":"ref13","doi-asserted-by":"publisher","unstructured":"S. Klink and T. Kieninger, \u201cRule-based document structure understanding with a fuzzy combination of layout and textual features,\u201d \nInternational Journal on Document Analysis and Recognition, vol. 4, \nno. 1, pp. 18\u201326, 2001. https:\/\/dx.doi.org\/10.1007\/PL00013570","DOI":"10.1007\/PL00013570"},{"key":"ref14","doi-asserted-by":"publisher","unstructured":"P. Pathirana, A. Silva, T. Lawrence, T. Weerasinghe, and \nR. Abeyweera, \u201cA comparative evaluation of pdf-to-html conversion \ntools,\u201d in 2023 International Research Conference on Smart \nComputing and Systems Engineering (SCSE), vol. 6, 2023. https:\/\/dx.doi.org\/10.1109\/SCSE59836.2023.10214989 pp. 1\u20137.","DOI":"10.1109\/SCSE59836.2023.10214989"},{"key":"ref15","doi-asserted-by":"publisher","unstructured":"P. Lopez, \u201cGrobid: Combining automatic bibliographic data recognition \nand term extraction for scholarship publications,\u201d in Research and \nAdvanced Technology for Digital Libraries, M. Agosti, J. Borbinha, \nS. Kapidakis, C. Papatheodorou, and G. Tsakonas, Eds. Berlin, Heidelberg: Springer Berlin Heidelberg, 2009. https:\/\/dx.doi.org\/10.1007\/978-3-642-04346-8_62. ISBN 978-3-642-04346-8 pp. 473\u2013474.","DOI":"10.1007\/978-3-642-04346-8_62"},{"key":"ref16","doi-asserted-by":"publisher","unstructured":"R. Altenh\u00f6ner, A. Berger, C. Bracht, P. Klimpel, S. Meyer, \nA. Neuburger, T. St\u00e4cker, and R. Stein, \u201cDFG-Praxisregeln \n\"Digitalisierung\". Aktualisierte Fassung 2022.\u201d Feb. 2023. [Online]. \nAvailable: https:\/\/doi.org\/10.5281\/zenodo.7435724","DOI":"10.5281\/zenodo.7435724"},{"key":"ref17","doi-asserted-by":"publisher","unstructured":"W. Meier, \u201cexist: An open source native xml database,\u201d in Web, Web-Services, and Database Systems, A. B. Chaudhri, M. Jeckle, E. Rahm, \nand R. Unland, Eds. Berlin, Heidelberg: Springer Berlin Heidelberg, \n2003. https:\/\/dx.doi.org\/10.1007\/3-540-36560-5_13. ISBN 978-3-540-36560-0 pp. 169\u2013183.","DOI":"10.1007\/3-540-36560-5_13"},{"key":"ref18","doi-asserted-by":"crossref","unstructured":"P. Christen, Data Matching: Concepts and Techniques for Record Linkage, Entity Resolution, and Duplicate Detection. Springer Publishing \nCompany, Incorporated, 2012. ISBN 3642311636","DOI":"10.1007\/978-3-642-31164-2"},{"key":"ref19","unstructured":"R. Altenh\u00f6ner, A. Berger, C. Bracht, P. Klimpel, S. Meyer, A. Neuburger, \nT. St\u00e4cker, and R. Stein, \u201cDFG practical guidelines on digitisation. \nupdated version 2022,\u201d 2023."},{"key":"ref20","doi-asserted-by":"publisher","unstructured":"M. S\u00f6ylemez, B. Tekinerdogan, and A. Koluk\u0131sa Tarhan, \u201cChallenges \nand solution directions of microservice architectures: A systematic \nliterature review,\u201d Applied Sciences, vol. 12, no. 11, 2022. https:\/\/dx.doi.org\/10.3390\/app12115507. [Online]. Available: https:\/\/www.mdpi.com\/2076-3417\/12\/11\/5507","DOI":"10.3390\/app12115507"},{"key":"ref21","doi-asserted-by":"crossref","unstructured":"B. Kim, S. Nakamura, and H. Watanave, \u201cUsing archivematica and \nomeka s for long-term preservation and access of digitized archive materials,\u201d in From Born-Physical to Born-Virtual: Augmenting Intelligence \nin Digital Libraries, Y.-H. Tseng, M. Katsurai, and H. N. Nguyen, Eds. \nCham: Springer International Publishing, 2022, pp. 241\u2013250.","DOI":"10.1007\/978-3-031-21756-2_20"},{"key":"ref22","unstructured":"M. Klindt and K. Amrhein, \u201cOne core preservation system for all \nyour data. no exceptions!\u201d in iPRES 2015 - Proceedings of the 12th \nInternational Conference on Preservation of Digital Objects, 2015, pp. \n101 \u2013 108. [Online]. Available: http:\/\/phaidra.univie.ac.at\/o:429551"},{"key":"ref23","doi-asserted-by":"crossref","unstructured":"J. D\u00f6rpinghaus and M. Tiemann, \u201cVocational education and training data \nin twitter: Making german twitter data interoperable,\u201d Proceedings of the \nAssociation for Information Science and Technology, vol. 60, no. 1, pp. \n946\u2013948, 2023.","DOI":"10.1002\/pra2.907"},{"key":"ref24","unstructured":"M. Bolanowski, M. Ganzha, L. Maciaszek, M. Paprzycki, and D. Slezak, \nEds., Communication Papers of the 19th Conference on Computer \nScience and Intelligence Systems (FedCSIS), 2024."}],"event":{"name":"20th Conference on Computer Science and Intelligence Systems (FedCSIS)","theme":"Computer Science and Intelligence Systems","location":"Krak\u00f3w, Poland","acronym":"FedCSIS","number":"20","start":{"date-parts":[[2025,9,14]]},"end":{"date-parts":[[2025,9,17]]}},"container-title":["Annals of Computer Science and Information Systems","Communication Papers of the 20th Conference on Computer Science and Intelligence Systems (FedCSIS)"],"original-title":[],"deposited":{"date-parts":[[2025,10,22]],"date-time":"2025-10-22T08:16:45Z","timestamp":1761121005000},"score":1,"resource":{"primary":{"URL":"https:\/\/annals-csis.org\/Volume_45\/drp\/0064.html"}},"subtitle":[],"proceedings-subject":"Computer Science and Information Systems","short-title":[],"issued":{"date-parts":[[2025,10,15]]},"references-count":24,"URL":"https:\/\/doi.org\/10.15439\/2025f0064","relation":{},"ISSN":["2300-5963"],"issn-type":[{"value":"2300-5963","type":"print"}],"subject":[],"published":{"date-parts":[[2025,10,15]]}}}